List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:full_MapReduce.C4_5.java
License:Open Source License
private static void findBestAttribute() throws Exception { Job job = Job.getInstance(); job.setJarByClass(C4_5.class); job.setJobName("C4.5_findBestAttribute"); FileInputFormat.addInputPath(job, calc_attributes_info_path); FileOutputFormat.setOutputPath(job, best_attribute_result_path); job.setMapperClass(FindBestAttributeMapper.class); job.setReducerClass(FindBestAttributeReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(AttributeGainRatioWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.waitForCompletion(false);//from w ww .jav a 2s . com }
From source file:gaffer.accumulo.bulkimport.BulkImportDriver.java
License:Apache License
public int run(String[] args) throws Exception { // Usage/*from w w w. j a va 2 s .c om*/ if (args.length < 3) { System.err.println("Usage: " + BulkImportDriver.class.getName() + " <inputpath> <output_path> <accumulo_properties_file>"); return 1; } // Gets paths Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1] + "/data_for_accumulo/"); Path splitsFilePath = new Path(args[1] + "/splits_file"); String accumuloPropertiesFile = args[2]; // Hadoop configuration Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // Connect to Accumulo AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist - create the table before running this"); return 1; } // Get the current splits from the table. // (This assumes that we have already created the table using <code>InitialiseTable</code>.) Collection<Text> splits = conn.tableOperations().getSplits(tableName); int numSplits = splits.size(); System.out.println("Number of splits in table is " + numSplits); // Write current splits to a file (this is needed so that the following MapReduce // job can move them to the DistributedCache). IngestUtils.createSplitsFile(conn, tableName, fs, splitsFilePath); // Run MapReduce to output data suitable for bulk import to Accumulo // Conf and job conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Convert data to Accumulo format: input = " + inputPath + ", output = " + outputPath); // Input job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, inputPath); // Mapper job.setMapperClass(BulkImportMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); // Partitioner job.setPartitionerClass(KeyRangePartitioner.class); KeyRangePartitioner.setSplitFile(job, splitsFilePath.toString()); // Reducer job.setReducerClass(BulkImportReducer.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); job.setNumReduceTasks(numSplits + 1); // Output job.setOutputFormatClass(AccumuloFileOutputFormat.class); AccumuloFileOutputFormat.setOutputPath(job, outputPath); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } return 0; }
From source file:gaffer.accumulo.inputformat.example.ExampleDriver.java
License:Apache License
public int run(String[] args) throws Exception { // Usage//from w w w .j ava 2 s . co m if (args.length != 6 && args.length != 7) { System.err.println(USAGE); return 1; } // Parse options Path outputPath = new Path(args[0]); String accumuloPropertiesFile = args[1]; int numReduceTasks; try { numReduceTasks = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println(USAGE); return 1; } Date startDate = null; Date endDate = null; boolean useTimeWindow = false; if (!args[3].equals("null") && !args[4].equals("null")) { try { startDate = DATE_FORMAT.parse(args[3]); endDate = DATE_FORMAT.parse(args[4]); } catch (ParseException e) { System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage()); return 1; } useTimeWindow = true; } boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]); boolean seedsSpecified = (args.length == 7); String seedsFile = ""; if (seedsSpecified) { seedsFile = args[6]; } // Hadoop configuration Configuration conf = getConf(); // Connect to Accumulo, so we can check connection and check that the // table exists AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName()); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist."); return 1; } // Create AccumuloBackedGraph and set view AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName); // - Time window if (useTimeWindow) { graph.setTimeWindow(startDate, endDate); } // - Roll up over time and visibility iterator graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility); // - If not specifying seeds then add iterator to avoid seeing the same edge multiple times if (seedsSpecified) { Set<TypeValue> typeValues = new HashSet<TypeValue>(); BufferedReader reader = new BufferedReader(new FileReader(seedsFile)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\\|"); if (tokens.length != 2) { System.err.println("Invalid line: " + line); continue; } String type = tokens[0]; String value = tokens[1]; typeValues.add(new TypeValue(type, value)); } reader.close(); // Use AccumuloBackedGraph to update the configuration with the view added above graph.setConfiguration(conf, typeValues, accConf); } else { // Use AccumuloBackedGraph to update the configuration with the view added above graph.setConfiguration(conf, accConf); } // Conf conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); // Job Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Example MapReduce against Gaffer data in Accumulo format: input = " + tableName + ", output = " + outputPath); // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat. if (seedsSpecified) { job.setInputFormatClass(BatchScannerElementInputFormat.class); } else { job.setInputFormatClass(ElementInputFormat.class); } // Mapper job.setMapperClass(ExampleMapper.class); job.setMapOutputKeyClass(GraphElement.class); job.setMapOutputValueClass(SetOfStatistics.class); // Reducer - use default IdentityReducer for this example job.setOutputKeyClass(GraphElement.class); job.setOutputValueClass(SetOfStatistics.class); job.setNumReduceTasks(numReduceTasks); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); System.out.println("Running MapReduce job over:"); System.out.println("\tTable: " + accConf.getTable()); System.out.println("\tUser: " + accConf.getUserName()); System.out.println("\tAuths: " + authorizations); if (useTimeWindow) { System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", " + DATE_FORMAT.format(endDate)); } else { System.out.println("\tFilter by time is off"); } System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } return 0; }
From source file:gaffer.accumulostore.operation.hdfs.handler.job.factory.SampleDataForSplitPointsJobFactory.java
License:Apache License
protected void setupJob(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException { job.setJarByClass(getClass());/* ww w. ja va2 s.c o m*/ job.setJobName(getJobName(operation.getMapperGeneratorClassName(), new Path(operation.getOutputPath()))); setupMapper(job, operation, store); setupReducer(job, operation, store); setupOutput(job, operation, store); }
From source file:gaffer.accumulostore.operation.hdfs.handler.job.SampleDataForSplitPointsJobFactory.java
License:Apache License
protected void setupJob(final Job job, final SampleDataForSplitPoints operation, final Store store) throws IOException { job.setJarByClass(getClass());/*www.jav a2 s. c o m*/ job.setJobName(getJobName(operation.getInputPath(), operation.getOutputPath())); setupMapper(job, operation, store); setupReducer(job, operation, store); setupOutput(job, operation, store); }
From source file:gaffer.analytic.impl.GraphStatistics.java
License:Apache License
public int run(String[] args) throws Exception { // Usage// ww w. jav a 2s . c o m if (args.length != 6 && args.length != 7) { System.err.println(USAGE); return 1; } // Parse options Path outputPath = new Path(args[0]); String accumuloPropertiesFile = args[1]; int numReduceTasks; try { numReduceTasks = Integer.parseInt(args[2]); } catch (NumberFormatException e) { System.err.println(USAGE); return 1; } Date startDate = null; Date endDate = null; boolean useTimeWindow = false; if (!args[3].equals("null") && !args[4].equals("null")) { try { startDate = DATE_FORMAT.parse(args[3]); endDate = DATE_FORMAT.parse(args[4]); } catch (ParseException e) { System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage()); return 1; } useTimeWindow = true; } boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]); boolean seedsSpecified = (args.length == 7); String seedsFile = ""; if (seedsSpecified) { seedsFile = args[6]; } // Hadoop configuration Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); // Connect to Accumulo, so we can check connection and check that the // table exists AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile); Connector conn = Accumulo.connect(accConf); String tableName = accConf.getTable(); Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName()); // Check if the table exists if (!conn.tableOperations().exists(tableName)) { System.err.println("Table " + tableName + " does not exist."); return 1; } // Create graph and update configuration based on the view AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName); if (useTimeWindow) { graph.setTimeWindow(startDate, endDate); } graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility); if (seedsSpecified) { Set<TypeValue> typeValues = new HashSet<TypeValue>(); BufferedReader reader = new BufferedReader(new FileReader(seedsFile)); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\\|"); if (tokens.length != 2) { System.err.println("Invalid line: " + line); continue; } String type = tokens[0]; String value = tokens[1]; typeValues.add(new TypeValue(type, value)); } reader.close(); graph.setConfiguration(conf, typeValues, accConf); } else { graph.setConfiguration(conf, accConf); } // Conf conf.setBoolean("mapred.compress.map.output", true); conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); // Job Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = " + outputPath); // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat. if (seedsSpecified) { job.setInputFormatClass(BatchScannerElementInputFormat.class); } else { job.setInputFormatClass(ElementInputFormat.class); } // Mapper job.setMapperClass(GraphStatisticsMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SetOfStatistics.class); // Combiner job.setCombinerClass(GraphStatisticsReducer.class); // Reducer job.setReducerClass(GraphStatisticsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SetOfStatistics.class); job.setNumReduceTasks(numReduceTasks); // Output job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); System.out.println("Running MapReduce job over:"); System.out.println("\tTable: " + accConf.getTable()); System.out.println("\tUser: " + accConf.getUserName()); System.out.println("\tAuths: " + authorizations); if (useTimeWindow) { System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", " + DATE_FORMAT.format(endDate)); } else { System.out.println("\tFilter by time is off"); } System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility); // Run job job.waitForCompletion(true); // Successful? if (!job.isSuccessful()) { System.err.println("Error running job"); return 1; } // Write results out System.out.println("Summary of graph"); for (FileStatus file : fs.listStatus(outputPath)) { if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); Text text = new Text(); SetOfStatistics stats = new SetOfStatistics(); while (reader.next(text, stats)) { System.out.println(text + ", " + stats); } reader.close(); } } return 0; }
From source file:gaffer.operation.simple.hdfs.handler.AbstractAddElementsFromHdfsJobFactory.java
License:Apache License
protected void setupJob(final Job job, final AddElementsFromHdfs operation, final Store store) throws IOException { job.setJarByClass(getClass());//from w w w . ja va 2 s . c o m job.setJobName(getJobName(operation.getInputPath(), operation.getOutputPath())); }
From source file:gaffer.operation.simple.hdfs.handler.job.factory.AbstractAddElementsFromHdfsJobFactory.java
License:Apache License
protected void setupJob(final Job job, final AddElementsFromHdfs operation, final Store store) throws IOException { job.setJarByClass(getClass());/* www . j a va2s .co m*/ job.setJobName(getJobName(operation.getMapperGeneratorClassName(), operation.getOutputPath())); }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
protected void configureJob(Job job) throws IOException { job.setJobName(HADOOP_JOB_NAME); configureInputAndOutputPaths(job);//w w w. ja v a 2 s. c o m configureMapper(job); configureReducer(job); if (!this.shouldDeduplicate) { job.setNumReduceTasks(0); } }
From source file:goraci.Generator.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage : " + Generator.class.getSimpleName() + " <num mappers> <num nodes>"); return 0; }/*w w w.j a v a 2s . co m*/ int numMappers = Integer.parseInt(args[0]); long numNodes = Long.parseLong(args[1]); Job job = new Job(getConf()); job.setJobName("Link Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); job.setInputFormatClass(GeneratorInputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.getConfiguration().setInt("goraci.generator.mappers", numMappers); job.getConfiguration().setLong("goraci.generator.nodes", numNodes); job.setMapperClass(GeneratorMapper.class); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }