Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:full_MapReduce.C4_5.java

License:Open Source License

private static void findBestAttribute() throws Exception {
    Job job = Job.getInstance();
    job.setJarByClass(C4_5.class);
    job.setJobName("C4.5_findBestAttribute");

    FileInputFormat.addInputPath(job, calc_attributes_info_path);
    FileOutputFormat.setOutputPath(job, best_attribute_result_path);

    job.setMapperClass(FindBestAttributeMapper.class);
    job.setReducerClass(FindBestAttributeReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(AttributeGainRatioWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.waitForCompletion(false);//from w ww  .jav  a  2s .  com
}

From source file:gaffer.accumulo.bulkimport.BulkImportDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage/*from   w  w  w. j a  va 2  s  .c om*/
    if (args.length < 3) {
        System.err.println("Usage: " + BulkImportDriver.class.getName()
                + " <inputpath> <output_path> <accumulo_properties_file>");
        return 1;
    }

    // Gets paths
    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1] + "/data_for_accumulo/");
    Path splitsFilePath = new Path(args[1] + "/splits_file");
    String accumuloPropertiesFile = args[2];

    // Hadoop configuration
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Connect to Accumulo
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist - create the table before running this");
        return 1;
    }

    // Get the current splits from the table.
    // (This assumes that we have already created the table using <code>InitialiseTable</code>.)
    Collection<Text> splits = conn.tableOperations().getSplits(tableName);
    int numSplits = splits.size();
    System.out.println("Number of splits in table is " + numSplits);

    // Write current splits to a file (this is needed so that the following MapReduce
    // job can move them to the DistributedCache).
    IngestUtils.createSplitsFile(conn, tableName, fs, splitsFilePath);

    // Run MapReduce to output data suitable for bulk import to Accumulo
    // Conf and job
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Convert data to Accumulo format: input = " + inputPath + ", output = " + outputPath);

    // Input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, inputPath);

    // Mapper
    job.setMapperClass(BulkImportMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    // Partitioner
    job.setPartitionerClass(KeyRangePartitioner.class);
    KeyRangePartitioner.setSplitFile(job, splitsFilePath.toString());

    // Reducer
    job.setReducerClass(BulkImportReducer.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    job.setNumReduceTasks(numSplits + 1);

    // Output
    job.setOutputFormatClass(AccumuloFileOutputFormat.class);
    AccumuloFileOutputFormat.setOutputPath(job, outputPath);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    return 0;
}

From source file:gaffer.accumulo.inputformat.example.ExampleDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage//from  w w  w  .j  ava  2 s  . co  m
    if (args.length != 6 && args.length != 7) {
        System.err.println(USAGE);
        return 1;
    }

    // Parse options
    Path outputPath = new Path(args[0]);
    String accumuloPropertiesFile = args[1];
    int numReduceTasks;
    try {
        numReduceTasks = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println(USAGE);
        return 1;
    }
    Date startDate = null;
    Date endDate = null;
    boolean useTimeWindow = false;
    if (!args[3].equals("null") && !args[4].equals("null")) {
        try {
            startDate = DATE_FORMAT.parse(args[3]);
            endDate = DATE_FORMAT.parse(args[4]);
        } catch (ParseException e) {
            System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage());
            return 1;
        }
        useTimeWindow = true;
    }
    boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]);
    boolean seedsSpecified = (args.length == 7);
    String seedsFile = "";
    if (seedsSpecified) {
        seedsFile = args[6];
    }

    // Hadoop configuration
    Configuration conf = getConf();

    // Connect to Accumulo, so we can check connection and check that the
    // table exists
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();
    Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName());

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist.");
        return 1;
    }

    // Create AccumuloBackedGraph and set view
    AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName);
    //    - Time window
    if (useTimeWindow) {
        graph.setTimeWindow(startDate, endDate);
    }
    //  - Roll up over time and visibility iterator
    graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility);
    //    - If not specifying seeds then add iterator to avoid seeing the same edge multiple times
    if (seedsSpecified) {
        Set<TypeValue> typeValues = new HashSet<TypeValue>();
        BufferedReader reader = new BufferedReader(new FileReader(seedsFile));
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\\|");
            if (tokens.length != 2) {
                System.err.println("Invalid line: " + line);
                continue;
            }
            String type = tokens[0];
            String value = tokens[1];
            typeValues.add(new TypeValue(type, value));
        }
        reader.close();
        // Use AccumuloBackedGraph to update the configuration with the view added above
        graph.setConfiguration(conf, typeValues, accConf);
    } else {
        // Use AccumuloBackedGraph to update the configuration with the view added above
        graph.setConfiguration(conf, accConf);
    }

    // Conf
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    // Job
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Example MapReduce against Gaffer data in Accumulo format: input = " + tableName
            + ", output = " + outputPath);

    // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer
    // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat.
    if (seedsSpecified) {
        job.setInputFormatClass(BatchScannerElementInputFormat.class);
    } else {
        job.setInputFormatClass(ElementInputFormat.class);
    }

    // Mapper
    job.setMapperClass(ExampleMapper.class);
    job.setMapOutputKeyClass(GraphElement.class);
    job.setMapOutputValueClass(SetOfStatistics.class);

    // Reducer - use default IdentityReducer for this example
    job.setOutputKeyClass(GraphElement.class);
    job.setOutputValueClass(SetOfStatistics.class);
    job.setNumReduceTasks(numReduceTasks);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    System.out.println("Running MapReduce job over:");
    System.out.println("\tTable: " + accConf.getTable());
    System.out.println("\tUser: " + accConf.getUserName());
    System.out.println("\tAuths: " + authorizations);
    if (useTimeWindow) {
        System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", "
                + DATE_FORMAT.format(endDate));
    } else {
        System.out.println("\tFilter by time is off");
    }
    System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    return 0;
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.factory.SampleDataForSplitPointsJobFactory.java

License:Apache License

protected void setupJob(final Job job, final SampleDataForSplitPoints operation, final Store store)
        throws IOException {
    job.setJarByClass(getClass());/*  ww w. ja  va2  s.c o m*/
    job.setJobName(getJobName(operation.getMapperGeneratorClassName(), new Path(operation.getOutputPath())));
    setupMapper(job, operation, store);
    setupReducer(job, operation, store);
    setupOutput(job, operation, store);
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.SampleDataForSplitPointsJobFactory.java

License:Apache License

protected void setupJob(final Job job, final SampleDataForSplitPoints operation, final Store store)
        throws IOException {
    job.setJarByClass(getClass());/*www.jav  a2  s.  c o  m*/
    job.setJobName(getJobName(operation.getInputPath(), operation.getOutputPath()));
    setupMapper(job, operation, store);
    setupReducer(job, operation, store);
    setupOutput(job, operation, store);
}

From source file:gaffer.analytic.impl.GraphStatistics.java

License:Apache License

public int run(String[] args) throws Exception {
    // Usage// ww  w. jav  a 2s . c  o  m
    if (args.length != 6 && args.length != 7) {
        System.err.println(USAGE);
        return 1;
    }

    // Parse options
    Path outputPath = new Path(args[0]);
    String accumuloPropertiesFile = args[1];
    int numReduceTasks;
    try {
        numReduceTasks = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println(USAGE);
        return 1;
    }
    Date startDate = null;
    Date endDate = null;
    boolean useTimeWindow = false;
    if (!args[3].equals("null") && !args[4].equals("null")) {
        try {
            startDate = DATE_FORMAT.parse(args[3]);
            endDate = DATE_FORMAT.parse(args[4]);
        } catch (ParseException e) {
            System.err.println("Error parsing dates: " + args[3] + " " + args[4] + " " + e.getMessage());
            return 1;
        }
        useTimeWindow = true;
    }
    boolean rollUpOverTimeAndVisibility = Boolean.parseBoolean(args[5]);
    boolean seedsSpecified = (args.length == 7);
    String seedsFile = "";
    if (seedsSpecified) {
        seedsFile = args[6];
    }

    // Hadoop configuration
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Connect to Accumulo, so we can check connection and check that the
    // table exists
    AccumuloConfig accConf = new AccumuloConfig(accumuloPropertiesFile);
    Connector conn = Accumulo.connect(accConf);
    String tableName = accConf.getTable();
    Authorizations authorizations = conn.securityOperations().getUserAuthorizations(accConf.getUserName());

    // Check if the table exists
    if (!conn.tableOperations().exists(tableName)) {
        System.err.println("Table " + tableName + " does not exist.");
        return 1;
    }

    // Create graph and update configuration based on the view
    AccumuloBackedGraph graph = new AccumuloBackedGraph(conn, tableName);
    if (useTimeWindow) {
        graph.setTimeWindow(startDate, endDate);
    }
    graph.rollUpOverTimeAndVisibility(rollUpOverTimeAndVisibility);
    if (seedsSpecified) {
        Set<TypeValue> typeValues = new HashSet<TypeValue>();
        BufferedReader reader = new BufferedReader(new FileReader(seedsFile));
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\\|");
            if (tokens.length != 2) {
                System.err.println("Invalid line: " + line);
                continue;
            }
            String type = tokens[0];
            String value = tokens[1];
            typeValues.add(new TypeValue(type, value));
        }
        reader.close();
        graph.setConfiguration(conf, typeValues, accConf);
    } else {
        graph.setConfiguration(conf, accConf);
    }

    // Conf
    conf.setBoolean("mapred.compress.map.output", true);
    conf.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    // Job
    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName("Running MapReduce against Gaffer data in Accumulo: input = " + tableName + ", output = "
            + outputPath);

    // Input format - use BatchScannerElementInputFormat if seeds have been specified (as that creates fewer
    // splits); otherwise use ElementInputFormat which is based on the standard AccumuloInputFormat.
    if (seedsSpecified) {
        job.setInputFormatClass(BatchScannerElementInputFormat.class);
    } else {
        job.setInputFormatClass(ElementInputFormat.class);
    }

    // Mapper
    job.setMapperClass(GraphStatisticsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SetOfStatistics.class);

    // Combiner
    job.setCombinerClass(GraphStatisticsReducer.class);

    // Reducer
    job.setReducerClass(GraphStatisticsReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SetOfStatistics.class);
    job.setNumReduceTasks(numReduceTasks);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    System.out.println("Running MapReduce job over:");
    System.out.println("\tTable: " + accConf.getTable());
    System.out.println("\tUser: " + accConf.getUserName());
    System.out.println("\tAuths: " + authorizations);
    if (useTimeWindow) {
        System.out.println("\tFilter by time: start time is " + DATE_FORMAT.format(startDate) + ", "
                + DATE_FORMAT.format(endDate));
    } else {
        System.out.println("\tFilter by time is off");
    }
    System.out.println("\tRoll up over time and visibility: " + rollUpOverTimeAndVisibility);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Write results out
    System.out.println("Summary of graph");
    for (FileStatus file : fs.listStatus(outputPath)) {
        if (!file.isDirectory() && !file.getPath().getName().contains("_SUCCESS")) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
            Text text = new Text();
            SetOfStatistics stats = new SetOfStatistics();
            while (reader.next(text, stats)) {
                System.out.println(text + ", " + stats);
            }
            reader.close();
        }
    }

    return 0;
}

From source file:gaffer.operation.simple.hdfs.handler.AbstractAddElementsFromHdfsJobFactory.java

License:Apache License

protected void setupJob(final Job job, final AddElementsFromHdfs operation, final Store store)
        throws IOException {
    job.setJarByClass(getClass());//from w w w .  ja  va 2 s  .  c  o  m
    job.setJobName(getJobName(operation.getInputPath(), operation.getOutputPath()));
}

From source file:gaffer.operation.simple.hdfs.handler.job.factory.AbstractAddElementsFromHdfsJobFactory.java

License:Apache License

protected void setupJob(final Job job, final AddElementsFromHdfs operation, final Store store)
        throws IOException {
    job.setJarByClass(getClass());/* www . j a va2s  .co m*/
    job.setJobName(getJobName(operation.getMapperGeneratorClassName(), operation.getOutputPath()));
}

From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java

License:Apache License

protected void configureJob(Job job) throws IOException {
    job.setJobName(HADOOP_JOB_NAME);
    configureInputAndOutputPaths(job);//w  w w. ja v a 2 s.  c o  m
    configureMapper(job);
    configureReducer(job);
    if (!this.shouldDeduplicate) {
        job.setNumReduceTasks(0);
    }
}

From source file:goraci.Generator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length == 0) {
        System.out.println("Usage : " + Generator.class.getSimpleName() + " <num mappers> <num nodes>");
        return 0;
    }/*w  w w.j a v  a  2s .  co m*/

    int numMappers = Integer.parseInt(args[0]);
    long numNodes = Long.parseLong(args[1]);

    Job job = new Job(getConf());

    job.setJobName("Link Generator");
    job.setNumReduceTasks(0);
    job.setJarByClass(getClass());

    job.setInputFormatClass(GeneratorInputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.getConfiguration().setInt("goraci.generator.mappers", numMappers);
    job.getConfiguration().setLong("goraci.generator.nodes", numNodes);

    job.setMapperClass(GeneratorMapper.class);

    job.setOutputFormatClass(NullOutputFormat.class);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}