Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:io.fluo.stress.trie.Init.java

License:Apache License

private int buildTree(int nodeSize, FluoConfiguration props, Path tmp, int stopLevel) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Init.class);

    job.setJobName(Init.class.getName() + "_load");

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.getConfiguration().setInt(TRIE_NODE_SIZE_PROP, nodeSize);
    job.getConfiguration().setInt(TRIE_STOP_LEVEL_PROP, stopLevel);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(tmp, "nums"));

    job.setMapperClass(InitMapper.class);
    job.setCombinerClass(InitCombiner.class);
    job.setReducerClass(InitReducer.class);

    job.setOutputFormatClass(AccumuloFileOutputFormat.class);

    job.setPartitionerClass(RangePartitioner.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Connector conn = AccumuloUtil.getConnector(props);

    Path splitsPath = new Path(tmp, "splits.txt");

    Collection<Text> splits1 = writeSplits(props, fs, conn, splitsPath);

    RangePartitioner.setSplitFile(job, splitsPath.toString());
    job.setNumReduceTasks(splits1.size() + 1);

    Path outPath = new Path(tmp, "out");
    AccumuloFileOutputFormat.setOutputPath(job, outPath);

    boolean success = job.waitForCompletion(true);

    if (success) {
        Path failPath = new Path(tmp, "failures");
        fs.mkdirs(failPath);//from  www . ja v  a2  s . co m
        conn.tableOperations().importDirectory(props.getAccumuloTable(), outPath.toString(),
                failPath.toString(), false);
    }
    return success ? 0 : 1;
}

From source file:io.fluo.stress.trie.Load.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        log.error("Usage: " + this.getClass().getSimpleName() + "<fluoProps> <input dir>");
        System.exit(-1);/*w ww .  ja v a  2s .  c o m*/
    }

    FluoConfiguration props = new FluoConfiguration(new File(args[0]));
    Path input = new Path(args[1]);

    Job job = Job.getInstance(getConf());

    job.setJobName(Load.class.getName());

    job.setJarByClass(Load.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, input);

    job.setMapperClass(LoadMapper.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(FluoOutputFormat.class);
    FluoOutputFormat.configure(job, ConfigurationConverter.getProperties(props));

    job.getConfiguration().setBoolean("mapreduce.map.speculative", false);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:io.vitess.hadoop.MapReduceIT.java

License:Apache License

/**
 * Run a mapper only MR job and verify all the rows in the source table were outputted into HDFS.
 *///from w  ww.  ja v  a2s .co m
public void testDumpTableToHDFS() throws Exception {
    // Configurations for the job, output from mapper as Text
    Configuration conf = createJobConf();
    Job job = Job.getInstance(conf);
    job.setJobName("table");
    job.setJarByClass(VitessInputFormat.class);
    job.setMapperClass(TableMapper.class);
    VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(),
            "select id, name, age from vtgate_test", ImmutableList.<String>of(), 4 /* splitCount */,
            0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass());
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(RowWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n");
    // there should be one line per row in the source table
    assertEquals(NUM_ROWS, outputLines.length);
    Set<Long> actualAges = new HashSet<>();
    Set<String> actualNames = new HashSet<>();

    // Parse and verify we've gotten all the ages and rows.
    Gson gson = new Gson();
    for (String line : outputLines) {
        String[] parts = line.split("\t");
        actualAges.add(Long.valueOf(parts[0]));

        // Rows are written as JSON since this is TextOutputFormat.
        String rowJson = parts[1];
        Type mapType = new TypeToken<Map<String, String>>() {
        }.getType();
        @SuppressWarnings("unchecked")
        Map<String, String> map = (Map<String, String>) gson.fromJson(rowJson, mapType);
        actualNames.add(map.get("name"));
    }

    Set<Long> expectedAges = new HashSet<>();
    Set<String> expectedNames = new HashSet<>();
    for (long i = 1; i <= NUM_ROWS; i++) {
        // Generate values that match TestUtil.insertRows().
        expectedAges.add(i % 10);
        expectedNames.add("name_" + i);
    }
    assertEquals(expectedAges.size(), actualAges.size());
    assertTrue(actualAges.containsAll(expectedAges));
    assertEquals(NUM_ROWS, actualNames.size());
    assertTrue(actualNames.containsAll(expectedNames));
}

From source file:io.vitess.hadoop.MapReduceIT.java

License:Apache License

/**
 * Map all rows and aggregate by age at the reducer.
 *//*from  www .  j  a va 2  s.com*/
public void testReducerAggregateRows() throws Exception {
    Configuration conf = createJobConf();

    Job job = Job.getInstance(conf);
    job.setJobName("table");
    job.setJarByClass(VitessInputFormat.class);
    job.setMapperClass(TableMapper.class);
    VitessInputFormat.setInput(job, "localhost:" + testEnv.getPort(), testEnv.getKeyspace(),
            "select id, name, age from vtgate_test", ImmutableList.<String>of(), 1 /* splitCount */,
            0 /* numRowsPerQueryPart */, Algorithm.EQUAL_SPLITS, TestUtil.getRpcClientFactory().getClass());

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(RowWritable.class);

    job.setReducerClass(CountReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    Path outDir = new Path(testEnv.getTestOutputPath(), "mrvitess/output");
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    FileOutputFormat.setOutputPath(job, outDir);

    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());

    String[] outputLines = MapReduceTestUtil.readOutput(outDir, conf).split("\n");
    // There should be 10 different ages, because age = i % 10.
    assertEquals(10, outputLines.length);
    // All rows should be accounted for.
    int totalRowsReduced = 0;
    for (String line : outputLines) {
        totalRowsReduced += Integer.parseInt(line);
    }
    assertEquals(NUM_ROWS, totalRowsReduced);
}

From source file:ir.ac.ut.snl.mrcd.StageFour.java

public int run(String[] args)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    System.out.println("enter fuckin run");
    Job job = new Job();
    String input = args[0];/*from w ww.j a va  2  s  .c o m*/
    String output = args[1];
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    job.setJarByClass(StageFour.class);
    job.setJobName("Stage four");
    job.setMapperClass(StageFourMapper.class);
    job.setReducerClass(StageFourReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Edge.class);

    Path inFile = new Path("/home/arian/NetBeansProjects/bscthesis2/topkedgebetweenness");
    Path outFile = new Path("/home/arian/myhadoop/NetBeansProjects/bscthesis2/topkedgebetweenness");
    //        fs = FileSystem.get(new Configuration());
    fs = FileSystem.get(configuration);
    FSDataInputStream in = fs.open(inFile);
    FSDataOutputStream out = fs.create(outFile);
    System.out.println("fs is ok");
    int bytesRead = 0;
    byte buffer[] = new byte[256];
    while ((bytesRead = in.read(buffer)) > 0) {
        out.write(buffer, 0, bytesRead);
    }
    in.close();
    out.close();

    System.out.println("copy is ok");

    DistributedCache.addCacheFile(new URI("/home/arian/myhadoop/NetBeansProjects/bscthesis2/input3-converted"),
            configuration);
    DistributedCache.addCacheFile(
            new URI("/home/arian/myhadoop/NetBeansProjects/bscthesis2/topkedgebetweenness"), configuration);
    //        URI[] localCacheFilesx = DistributedCache.getCacheFiles(configuration);
    //        if (localCacheFilesx == null) {
    //            System.out.println("NULLE BI PEDARsssssssss");
    //        }
    //        if (localCacheFilesx != null) {
    //            System.out.println("There's something in the cache now.");
    //        }

    System.out.println("salam lllaaa");

    //        bufferedReader = null;
    //        bufferedReader2 = null;
    //        scanner = null;
    //        scanner2 = null;
    localCacheFiles = DistributedCache.getCacheFiles(configuration);
    if (localCacheFiles == null) {
        System.out.println("NULLE");
    }
    if (localCacheFiles != null) {
        System.out.println("There's something in the cache. an    " + localCacheFiles[1].toString());
        //                fileReader = new FileReader(localCacheFiles[1].toString());
        fs = FileSystem.get(configuration);
        in = fs.open(new Path(localCacheFiles[1].toString()));
        bufferedReader = new BufferedReader(new InputStreamReader(in));
        scanner = new Scanner(bufferedReader);
        if (!scanner.hasNextLine())
            System.out.println("ay ay AY AY SCANNER nextline nadare!!!!!!!!!!!!");
    }

    //        System.out.println("ssssalam sssssxxxxxx23ssa     " + scanner.nextLine());
    job.waitForCompletion(true);
    return 0;
}

From source file:ir.ac.ut.snl.mrcd.StageOne.java

public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job;
    String input, output;//from w  w  w  .jav a 2  s.  c om

    int iterationCount = 0;
    long terminationValue = 1;

    boolean result = false;

    while (terminationValue > 0) {
        job = new Job();

        if (iterationCount == 0) {
            input = args[0];
        } else {
            input = args[1] + iterationCount;
        }

        output = args[1] + (iterationCount + 1);

        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));

        job.setJarByClass(StageOne.class);
        job.setJobName("Stage one");

        job.setInputFormatClass(KeyValueTextInputFormat.class);
        job.setMapperClass(StageOneMapper.class);
        job.setReducerClass(StageOneReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(ShortestPathTuple.class);

        result = job.waitForCompletion(true);

        Counters jobCounters = job.getCounters();
        terminationValue = jobCounters.findCounter(StageOneCounter.ALL_ACTIVE).getValue();
        iterationCount++;
    }

    return 0;
}

From source file:ir.ac.ut.snl.mrcd.StageThree.java

public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job();
    String input = args[0];/*from  w w w .ja  va2s .  c  om*/
    String output = args[1];
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    job.setJarByClass(StageThree.class);
    job.setJobName("Stage three");
    job.setMapperClass(StageThreeMapper.class);
    job.setReducerClass(StageThreeReducer.class);
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setSortComparatorClass(SortDoubleComparator.class);

    job.waitForCompletion(true);

    Scanner scanner = null;
    try {
        File file = new File("/home/arian/NetBeansProjects/bscthesis2/output/stagethree/part-r-00000");
        FileReader fileReader = new FileReader(file);
        BufferedReader bufferedReader = new BufferedReader(fileReader);
        scanner = new Scanner(bufferedReader);
    } catch (Exception e) {
        System.out.println("NA NASHOD NASHOD NASHOD FILE BAZ NASHOD");
        e.printStackTrace();
    }

    PrintWriter printWriter = new PrintWriter("/home/arian/NetBeansProjects/bscthesis2/topkedgebetweenness",
            "UTF-8");

    int k = 4;
    for (int i = 0; i < k; i++) {
        printWriter.write(scanner.nextLine());
        //            if (i != k - 1)
        printWriter.write('\n');
    }
    printWriter.close();
    scanner.close();

    Path inFile = new Path("/home/arian/NetBeansProjects/bscthesis2/topkedgebetweenness");
    Path outFile = new Path("/home/arian/myhadoop/NetBeansProjects/bscthesis2/topkedgebetweenness");
    FileSystem fs = FileSystem.get(new Configuration());
    FSDataInputStream in = fs.open(inFile);
    FSDataOutputStream out = fs.create(outFile);

    int bytesRead = 0;
    byte buffer[] = new byte[256];
    while ((bytesRead = in.read(buffer)) > 0) {
        out.write(buffer, 0, bytesRead);
    }
    in.close();
    out.close();

    return 0;
}

From source file:ir.ac.ut.snl.mrcd.StageTwo.java

public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    System.out.println("too run too run too run");
    Job job = new Job();
    String input = args[0];//  w w w. ja  v  a  2  s .  c o m
    String output = args[1];
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    job.setJarByClass(StageTwo.class);
    job.setJobName("Stage two");
    //        job.setInputFormatClass(MyFileInputFormat.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setMapperClass(StageTwoMapper.class);
    job.setReducerClass(StageTwoReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    System.out.println("too run too run too run2222222222222");
    job.waitForCompletion(true);
    return 0;
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();//from ww w  . java  2  s  .  com
        return 1;
    }
    try {
        Job job = new Job(new Configuration());
        job.setJobName(getClass().getName());
        Configuration conf = job.getConfiguration();
        CommandLine results = cli.parse(conf, args);
        if (results.hasOption("input")) {
            Path path = new Path(results.getOptionValue("input"));
            FileInputFormat.setInputPaths(job, path);
        }
        if (results.hasOption("output")) {
            Path path = new Path(results.getOptionValue("output"));
            FileOutputFormat.setOutputPath(job, path);
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            explicitInputFormat = true;
            setIsJavaRecordReader(conf, true);
            job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(conf, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(conf, true);
            job.setMapperClass(getClass(results, "map", conf, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(conf, true);
            job.setReducerClass(getClass(results, "reduce", conf, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            explicitOutputFormat = true;
            setIsJavaRecordWriter(conf, true);
            job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass());
            }
        }
        if (results.hasOption("avroInput")) {
            avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase());
        }
        if (results.hasOption("avroOutput")) {
            avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase());
        }

        if (results.hasOption("program")) {
            setExecutable(conf, results.getOptionValue("program"));
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            conf.setClassLoader(loader);
        }
        setupPipesJob(job);
        return job.waitForCompletion(true) ? 0 : 1;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}

From source file:it.crs4.pydoop.WriteParquet.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 3) {
        System.err.println("Usage: WriteParquet <input path> <output path> <schema path>");
        return -1;
    }/*from w  ww  .  j  a  v a 2s  .  c o m*/
    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    String schemaPathName = args[2];

    Configuration conf = getConf();
    conf.set(SCHEMA_PATH_KEY, schemaPathName);
    Schema schema = getSchema(conf);

    Job job = new Job(conf);
    job.setJarByClass(getClass());
    job.setJobName(getClass().getName());

    AvroParquetOutputFormat.setSchema(job, schema);

    job.setMapperClass(WriteUserMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);

    return 0;
}