List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:io.fluo.stress.trie.Init.java
License:Apache License
private int buildTree(int nodeSize, FluoConfiguration props, Path tmp, int stopLevel) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Init.class); job.setJobName(Init.class.getName() + "_load"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.getConfiguration().setInt(TRIE_NODE_SIZE_PROP, nodeSize); job.getConfiguration().setInt(TRIE_STOP_LEVEL_PROP, stopLevel); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(tmp, "nums")); job.setMapperClass(InitMapper.class); job.setCombinerClass(InitCombiner.class); job.setReducerClass(InitReducer.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); job.setPartitionerClass(RangePartitioner.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Connector conn = AccumuloUtil.getConnector(props); Path splitsPath = new Path(tmp, "splits.txt"); Collection<Text> splits1 = writeSplits(props, fs, conn, splitsPath); RangePartitioner.setSplitFile(job, splitsPath.toString()); job.setNumReduceTasks(splits1.size() + 1); Path outPath = new Path(tmp, "out"); AccumuloFileOutputFormat.setOutputPath(job, outPath); boolean success = job.waitForCompletion(true); if (success) { Path failPath = new Path(tmp, "failures"); fs.mkdirs(failPath);//from www . ja v a2 s . co m conn.tableOperations().importDirectory(props.getAccumuloTable(), outPath.toString(), failPath.toString(), false); } return success ? 0 : 1; }
From source file:io.fluo.stress.trie.Load.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { log.error("Usage: " + this.getClass().getSimpleName() + "<fluoProps> <input dir>"); System.exit(-1);/*w ww . ja v a 2s . c o m*/ } FluoConfiguration props = new FluoConfiguration(new File(args[0])); Path input = new Path(args[1]); Job job = Job.getInstance(getConf()); job.setJobName(Load.class.getName()); job.setJarByClass(Load.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, input); job.setMapperClass(LoadMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(FluoOutputFormat.class); FluoOutputFormat.configure(job, ConfigurationConverter.getProperties(props)); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:io.vitess.hadoop.MapReduceIT.java
License:Apache License
/** * Run a mapper only MR job and verify all the rows in the source table were outputted into HDFS. *///from w ww. ja v a2s .co m public void testDumpTableToHDFS() throws Exception { // Configurations for the job, output from mapper as Text Configuration conf = createJobConf(); Job job = Job.getInstance(conf); job.setJobName("table"); job.setJarByClass(VitessInputFormat.class); job.setMapperClass(TableMapper.class); VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(), "select id, name, age from vtgate_test", ImmutableList.<String>of(), 4 /* splitCount */, 0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass()); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(RowWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(0); Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } FileOutputFormat.setOutputPath(job, outDir); job.waitForCompletion(true); assertTrue(job.isSuccessful()); String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n"); // there should be one line per row in the source table assertEquals(NUM_ROWS, outputLines.length); Set<Long> actualAges = new HashSet<>(); Set<String> actualNames = new HashSet<>(); // Parse and verify we've gotten all the ages and rows. Gson gson = new Gson(); for (String line : outputLines) { String[] parts = line.split("\t"); actualAges.add(Long.valueOf(parts[0])); // Rows are written as JSON since this is TextOutputFormat. String rowJson = parts[1]; Type mapType = new TypeToken<Map<String, String>>() { }.getType(); @SuppressWarnings("unchecked") Map<String, String> map = (Map<String, String>) gson.fromJson(rowJson, mapType); actualNames.add(map.get("name")); } Set<Long> expectedAges = new HashSet<>(); Set<String> expectedNames = new HashSet<>(); for (long i = 1; i <= NUM_ROWS; i++) { // Generate values that match TestUtil.insertRows(). expectedAges.add(i % 10); expectedNames.add("name_" + i); } assertEquals(expectedAges.size(), actualAges.size()); assertTrue(actualAges.containsAll(expectedAges)); assertEquals(NUM_ROWS, actualNames.size()); assertTrue(actualNames.containsAll(expectedNames)); }
From source file:io.vitess.hadoop.MapReduceIT.java
License:Apache License
/** * Map all rows and aggregate by age at the reducer. *//*from www . j a va 2 s.com*/ public void testReducerAggregateRows() throws Exception { Configuration conf = createJobConf(); Job job = Job.getInstance(conf); job.setJobName("table"); job.setJarByClass(VitessInputFormat.class); job.setMapperClass(TableMapper.class); VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(), "select id, name, age from vtgate_test", ImmutableList.<String>of(), 1 /* splitCount */, 0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass()); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(RowWritable.class); job.setReducerClass(CountReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output"); FileSystem fs = FileSystem.get(conf); if (fs.exists(outDir)) { fs.delete(outDir, true); } FileOutputFormat.setOutputPath(job, outDir); job.waitForCompletion(true); assertTrue(job.isSuccessful()); String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n"); // There should be 10 different ages, because age = i % 10. assertEquals(10, outputLines.length); // All rows should be accounted for. int totalRowsReduced = 0; for (String line : outputLines) { totalRowsReduced += Integer.parseInt(line); } assertEquals(NUM_ROWS, totalRowsReduced); }
From source file:ir.ac.ut.snl.mrcd.StageFour.java
public int run(String[] args) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { System.out.println("enter fuckin run"); Job job = new Job(); String input = args[0];/*from w ww.j a va 2 s .c o m*/ String output = args[1]; FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(StageFour.class); job.setJobName("Stage four"); job.setMapperClass(StageFourMapper.class); job.setReducerClass(StageFourReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Edge.class); Path inFile = new Path("/home/arian/NetBeansProjects/bscthesis2/topkedgebetweenness"); Path outFile = new Path("/home/arian/myhadoop/NetBeansProjects/bscthesis2/topkedgebetweenness"); // fs = FileSystem.get(new Configuration()); fs = FileSystem.get(configuration); FSDataInputStream in = fs.open(inFile); FSDataOutputStream out = fs.create(outFile); System.out.println("fs is ok"); int bytesRead = 0; byte buffer[] = new byte[256]; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); out.close(); System.out.println("copy is ok"); DistributedCache.addCacheFile(new URI("/home/arian/myhadoop/NetBeansProjects/bscthesis2/input3-converted"), configuration); DistributedCache.addCacheFile( new URI("/home/arian/myhadoop/NetBeansProjects/bscthesis2/topkedgebetweenness"), configuration); // URI[] localCacheFilesx = DistributedCache.getCacheFiles(configuration); // if (localCacheFilesx == null) { // System.out.println("NULLE BI PEDARsssssssss"); // } // if (localCacheFilesx != null) { // System.out.println("There's something in the cache now."); // } System.out.println("salam lllaaa"); // bufferedReader = null; // bufferedReader2 = null; // scanner = null; // scanner2 = null; localCacheFiles = DistributedCache.getCacheFiles(configuration); if (localCacheFiles == null) { System.out.println("NULLE"); } if (localCacheFiles != null) { System.out.println("There's something in the cache. an " + localCacheFiles[1].toString()); // fileReader = new FileReader(localCacheFiles[1].toString()); fs = FileSystem.get(configuration); in = fs.open(new Path(localCacheFiles[1].toString())); bufferedReader = new BufferedReader(new InputStreamReader(in)); scanner = new Scanner(bufferedReader); if (!scanner.hasNextLine()) System.out.println("ay ay AY AY SCANNER nextline nadare!!!!!!!!!!!!"); } // System.out.println("ssssalam sssssxxxxxx23ssa " + scanner.nextLine()); job.waitForCompletion(true); return 0; }
From source file:ir.ac.ut.snl.mrcd.StageOne.java
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job; String input, output;//from w w w .jav a 2 s. c om int iterationCount = 0; long terminationValue = 1; boolean result = false; while (terminationValue > 0) { job = new Job(); if (iterationCount == 0) { input = args[0]; } else { input = args[1] + iterationCount; } output = args[1] + (iterationCount + 1); FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(StageOne.class); job.setJobName("Stage one"); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(StageOneMapper.class); job.setReducerClass(StageOneReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ShortestPathTuple.class); result = job.waitForCompletion(true); Counters jobCounters = job.getCounters(); terminationValue = jobCounters.findCounter(StageOneCounter.ALL_ACTIVE).getValue(); iterationCount++; } return 0; }
From source file:ir.ac.ut.snl.mrcd.StageThree.java
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(); String input = args[0];/*from w w w .ja va2s . c om*/ String output = args[1]; FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(StageThree.class); job.setJobName("Stage three"); job.setMapperClass(StageThreeMapper.class); job.setReducerClass(StageThreeReducer.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setSortComparatorClass(SortDoubleComparator.class); job.waitForCompletion(true); Scanner scanner = null; try { File file = new File("/home/arian/NetBeansProjects/bscthesis2/output/stagethree/part-r-00000"); FileReader fileReader = new FileReader(file); BufferedReader bufferedReader = new BufferedReader(fileReader); scanner = new Scanner(bufferedReader); } catch (Exception e) { System.out.println("NA NASHOD NASHOD NASHOD FILE BAZ NASHOD"); e.printStackTrace(); } PrintWriter printWriter = new PrintWriter("/home/arian/NetBeansProjects/bscthesis2/topkedgebetweenness", "UTF-8"); int k = 4; for (int i = 0; i < k; i++) { printWriter.write(scanner.nextLine()); // if (i != k - 1) printWriter.write('\n'); } printWriter.close(); scanner.close(); Path inFile = new Path("/home/arian/NetBeansProjects/bscthesis2/topkedgebetweenness"); Path outFile = new Path("/home/arian/myhadoop/NetBeansProjects/bscthesis2/topkedgebetweenness"); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(inFile); FSDataOutputStream out = fs.create(outFile); int bytesRead = 0; byte buffer[] = new byte[256]; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); out.close(); return 0; }
From source file:ir.ac.ut.snl.mrcd.StageTwo.java
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { System.out.println("too run too run too run"); Job job = new Job(); String input = args[0];// w w w. ja v a 2 s . c o m String output = args[1]; FileInputFormat.addInputPath(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(StageTwo.class); job.setJobName("Stage two"); // job.setInputFormatClass(MyFileInputFormat.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(StageTwoMapper.class); job.setReducerClass(StageTwoReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); System.out.println("too run too run too run2222222222222"); job.waitForCompletion(true); return 0; }
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();//from ww w . java 2 s . com return 1; } try { Job job = new Job(new Configuration()); job.setJobName(getClass().getName()); Configuration conf = job.getConfiguration(); CommandLine results = cli.parse(conf, args); if (results.hasOption("input")) { Path path = new Path(results.getOptionValue("input")); FileInputFormat.setInputPaths(job, path); } if (results.hasOption("output")) { Path path = new Path(results.getOptionValue("output")); FileOutputFormat.setOutputPath(job, path); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { explicitInputFormat = true; setIsJavaRecordReader(conf, true); job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(conf, true); } if (results.hasOption("map")) { setIsJavaMapper(conf, true); job.setMapperClass(getClass(results, "map", conf, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(conf, true); job.setReducerClass(getClass(results, "reduce", conf, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { explicitOutputFormat = true; setIsJavaRecordWriter(conf, true); job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass()); } } if (results.hasOption("avroInput")) { avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase()); } if (results.hasOption("avroOutput")) { avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase()); } if (results.hasOption("program")) { setExecutable(conf, results.getOptionValue("program")); } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() }; // FindBugs complains that creating a URLClassLoader should be // in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); conf.setClassLoader(loader); } setupPipesJob(job); return job.waitForCompletion(true) ? 0 : 1; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:it.crs4.pydoop.WriteParquet.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.err.println("Usage: WriteParquet <input path> <output path> <schema path>"); return -1; }/*from w ww . j a v a 2s . c o m*/ Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); String schemaPathName = args[2]; Configuration conf = getConf(); conf.set(SCHEMA_PATH_KEY, schemaPathName); Schema schema = getSchema(conf); Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName(getClass().getName()); AvroParquetOutputFormat.setSchema(job, schema); job.setMapperClass(WriteUserMap.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(AvroParquetOutputFormat.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0; }