Example usage for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException

Source Link

Document

Set the user-specified job name.

Usage

From source file:eu.edisonproject.classification.tfidf.mapreduce.WordCountsForDocsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);

    job.setJarByClass(WordCountsForDocsDriver.class);
    job.setJobName("Word Counts For Docs Driver");

    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);

    FileInputFormat.setInputPaths(job, inPath);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    job.setMapperClass(WordCountsForDocsMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(WordCountsForDocsReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.WordFrequencyInDocDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    //        itemset = new LinkedList<String>();
    //        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2])));
    //        String line;
    //        while ((line = br.readLine()) != null) {
    //            String[] components = line.split("/");
    //            itemset.add(components[0]);
    //        }/*from   w w w .j a  va2  s.  c o  m*/
    Configuration conf = getConf();

    Job job = Job.getInstance(conf);
    job.setJarByClass(WordFrequencyInDocDriver.class);
    job.setJobName("Word Frequency In Doc Driver");

    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;

    Path dictionaryLocal = new Path(args[2]);
    Path dictionaryHDFS = dictionaryLocal;

    Path stopwordsLocal = new Path(args[3]);
    Path stopwordsHDFS = stopwordsLocal;

    if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);

        dictionaryHDFS = new Path(dictionaryLocal.getName());
        if (!fs.exists(dictionaryHDFS)) {
            fs.copyFromLocalFile(dictionaryLocal, dictionaryHDFS);
        }
        stopwordsHDFS = new Path(stopwordsLocal.getName());
        if (!fs.exists(stopwordsHDFS)) {
            fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS);
        }
    }

    FileStatus dictionaryStatus = fs.getFileStatus(dictionaryHDFS);
    dictionaryHDFS = dictionaryStatus.getPath();
    job.addCacheFile(dictionaryHDFS.toUri());

    FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS);
    stopwordsHDFS = stopwordsStatus.getPath();
    job.addCacheFile(stopwordsHDFS.toUri());

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(WordFrequencyInDocMapper.class);
    AvroJob.setInputKeySchema(job, Document.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(WordFrequencyInDocReducer.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();

    FileSystem fs = FileSystem.get(jobconf);
    fs.delete(new Path(args[1]), true);
    Path in = new Path(args[0]);
    Path inHdfs = in;//ww w.j  a va2 s. c om
    if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
        inHdfs = new Path(in.getName());
        fs.delete(inHdfs, true);
        fs.copyFromLocalFile(in, inHdfs);
        fs.deleteOnExit(inHdfs);
        FileStatus inHdfsStatus = fs.getFileStatus(inHdfs);
        //            Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()});
    }

    Job job = Job.getInstance(jobconf);
    Path stopwordsLocal = new Path(args[3]);
    stopwords = new Path(stopwordsLocal.getName());
    fs.delete(stopwords, true);
    fs.copyFromLocalFile(stopwordsLocal, stopwords);
    fs.deleteOnExit(stopwords);

    FileStatus stopwordsStatus = fs.getFileStatus(stopwords);
    stopwords = stopwordsStatus.getPath();
    job.addCacheFile(stopwords.toUri());

    Path localDocs = new Path(args[2]);
    Path hdfsDocs = new Path(localDocs.getName());
    fs.mkdirs(hdfsDocs);
    hdfsDocs = fs.getFileStatus(hdfsDocs).getPath();
    fs.delete(hdfsDocs, true);
    //        FileStatus[] stats = fs.listStatus(localDocs);
    File[] stats = new File(localDocs.toString()).listFiles();

    for (File stat : stats) {
        //        for (FileStatus stat : stats) {
        Path filePath = new Path(stat.getAbsolutePath());
        if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) {
            Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName());
            fs.copyFromLocalFile(filePath, dest);
        }
    }

    job.addCacheFile(hdfsDocs.toUri());

    job.setJarByClass(TermWordFrequency.class);
    job.setJobName("Word Frequency Term Driver");

    FileInputFormat.setInputPaths(job, inHdfs);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //         job.setInputFormatClass(TextInputFormat.class);
    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, inHdfs);
    NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4]));
    NLineInputFormat.setMaxInputSplitSize(job, 500);
    Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}",
            NLineInputFormat.getNumLinesPerSplit(job));

    job.setMapperClass(TermWordFrequencyMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(TermWordFrequencyReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.edisonproject.training.tfidf.mapreduce.TFIDF.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (docs == null) {
        docs = new HashSet<>();
    }/*from  ww w .j av a  2 s  . c o m*/

    if (docs.isEmpty()) {
        CharArraySet stopWordArraySet = new CharArraySet(ConfigHelper.loadStopWords(args[3]), true);
        cleanStopWord = new StopWord(stopWordArraySet);
        File docsDir = new File(args[2]);
        for (File f : docsDir.listFiles()) {
            if (FilenameUtils.getExtension(f.getName()).endsWith("txt")) {
                ReaderFile rf = new ReaderFile(f.getAbsolutePath());
                cleanStopWord.setDescription(rf.readFile());
                docs.add(cleanStopWord.execute());
            }
        }
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);

    job.setJarByClass(TFIDF.class);
    job.setJobName("IDF");

    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);

    FileInputFormat.setInputPaths(job, inPath);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    job.setMapperClass(IDFMapper.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(IDFReducer.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.training.tfidf.mapreduce.WordCountsForDocsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCountsForDocsDriver.class);
    job.setJobName("Word Counts For Docs Driver");

    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outPath, true);/*  w  w w  . ja  va  2  s .  co m*/

    FileInputFormat.setInputPaths(job, inPath);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    job.setMapperClass(WordCountsForDocsMapper.class);
    //        job.setInputFormatClass(NLineInputFormat.class);
    //        NLineInputFormat.addInputPath(job, inPath);
    //        NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[2]));
    //        NLineInputFormat.setMaxInputSplitSize(job, 2000);
    /*Here it is possible put the combiner class
      job.setCombinerClass(AvroAverageCombiner.class);
     */
    //        job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    //        job.setReducerClass(WordCountsForDocsReducer.class);
    //        AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    //        AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.STRING));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(WordCountsForDocsReducer.class);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.training.tfidf.mapreduce.WordFrequencyInDocDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration jobconf = getConf();

    //        if (isHaddopOn()) {
    //            jobconf.set("fs.defaultFS", "hdfs://master.ib.cluster:8020");
    //            jobconf.set("mapred.job.tracker", "localhost:9001");
    //        }//from w  w  w .ja  v  a2s  .c o  m
    //        try {
    new Path(args[1]).getFileSystem(jobconf).delete(new Path(args[1]), true);
    //        } catch (java.net.ConnectException ex) {
    //
    //            jobconf.set("fs.defaultFS", "file:///");
    //            jobconf.set("mapred.job.tracker", null);
    //            new Path(args[1]).getFileSystem(jobconf).delete(new Path(args[1]), true);
    //        }

    itemset = new LinkedList<String>();
    BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2])));
    String line;
    while ((line = br.readLine()) != null) {
        String[] components = line.split("/");
        itemset.add(components[0]);
    }
    Job job = Job.getInstance(jobconf);
    job.setJarByClass(WordFrequencyInDocDriver.class);
    job.setJobName("Word Frequency In Doc Driver");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapperClass(WordFrequencyInDocMapper.class);
    AvroJob.setInputKeySchema(job, Term.getClassSchema());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Integer.class);
    job.setReducerClass(WordFrequencyInDocReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);

}

From source file:eu.edisonproject.training.tfidf.mapreduce.WordsGroupByTitleDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    //Configuration config = HBaseConfiguration.create();
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    //TableMapReduceUtil.addDependencyJars(job); 
    job.setJarByClass(WordsGroupByTitleDriver.class);
    //This row must be changed
    job.setJobName("Words Group By Title Driver");

    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outPath, true);/*  w  ww . ja v  a2 s .co m*/

    FileInputFormat.setInputPaths(job, inPath);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    job.setMapperClass(WordsGroupByTitleMapper.class);
    //        job.setInputFormatClass(NLineInputFormat.class);
    //        NLineInputFormat.addInputPath(job, inPath);
    //        NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[2]));
    //        NLineInputFormat.setMaxInputSplitSize(job, 2000);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(WordsGroupByTitleReducer.class);
    //        job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    //        job.setReducerClass(WordsGroupByTitleReducer.class);
    //        AvroJob.setOutputKeySchema(job, TfidfDocument.SCHEMA$);
    //        AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.STRING));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.edisonproject.training.tfidf.mapreduce.WordsInCorpusTFIDFDriver.java

License:Apache License

@Override
public int run(String[] rawArgs) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordsInCorpusTFIDFDriver.class);
    //This row must be changed
    job.setJobName(rawArgs[2]);

    FileSystem fs = FileSystem.get(conf);
    Path inPath = new Path(rawArgs[0]);
    Path outPath = new Path(rawArgs[1]);
    fs.delete(outPath, true);/*from  w w  w .j a  v  a  2 s. com*/

    FileInputFormat.setInputPaths(job, inPath);
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(WordsInCorpusTFIDFMapper.class);
    //        job.setInputFormatClass(NLineInputFormat.class);
    //        NLineInputFormat.addInputPath(job, inPath);
    //        NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(rawArgs[3]));
    //         NLineInputFormat.setMaxInputSplitSize(job, 2000);

    //        job.setInputFormatClass(AvroKeyValueInputFormat.class);
    //        job.setMapperClass(WordsInCorpusTFIDFMapper.class);
    //        AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.STRING));
    //        AvroJob.setInputValueSchema(job, Schema.create(Schema.Type.STRING));       
    //        
    //        job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    //        job.setReducerClass(WordsInCorpusTFIDFReducer.class);
    //        AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    //        AvroJob.setOutputValueSchema(job, Tfidf.getClassSchema());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(WordsInCorpusTFIDFReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:eu.scape_project.tb.wc.archd.hadoop.HadoopArcReaderJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    HadoopJobCliConfig pc = new HadoopJobCliConfig();
    CommandLineParser cmdParser = new PosixParser();
    CommandLine cmd = cmdParser.parse(HadoopJobOptions.OPTIONS, gop.getRemainingArgs());
    if ((args.length == 0) || (cmd.hasOption(HadoopJobOptions.HELP_OPT))) {
        HadoopJobOptions.exit("Usage", 0);
    } else {/*from www .  j a  v a  2 s  . c  om*/
        HadoopJobOptions.initOptions(cmd, pc);
    }
    String dir = pc.getDirStr();

    String name = pc.getHadoopJobName();
    if (name == null || name.equals("")) {
        name = "webarc_reader"; // default job name
    }

    Job job = new Job(conf);

    //**********************************************************
    // for debugging in local mode
    // comment out the 2 lines below befor switching to pseudo-distributed or fully-distributed mode
    // job.getConfiguration().set("mapred.job.tracker", "local");
    // job.getConfiguration().set("fs.default.name", "local");
    //**********************************************************

    FileInputFormat.setInputPaths(job, new Path(dir));
    String outpath = "output/" + System.currentTimeMillis() + "wcr";
    logger.info("Output directory: " + outpath);
    FileOutputFormat.setOutputPath(job, new Path(outpath));

    job.setJarByClass(HadoopArcReaderJob.class);

    job.setJobName(name);

    //*** Set interface data types
    // We are using LONG because this value can become very large on huge archives.
    // In order to use the combiner function, also the map output needs to be a LONG.
    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //*** Set up the mapper, combiner and reducer
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    //*** Set the MAP output compression
    //job.getConfiguration().set("mapred.compress.map.output", "true");

    //*** Set input / output format
    job.setInputFormatClass(ArcInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //*** Start the job and wait for it
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:eu.scape_project.tb.wc.archd.mapreduce.FileCharacterisation.java

License:Apache License

public int run(String[] args) throws Exception {

    Job job = null;//Job.getInstance(getConf());
    System.out.println(getConf().get("mapreduce.job.user.classpath.first"));

    for (int i = 0; i < args.length; i++) {
        System.out.println("Arg" + i + ": " + args[i]);
    }/* ww  w .j a v a  2s  . co m*/

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setJarByClass(FileCharacterisation.class);
    job.setJobName(name);

    //*** Set interface data types
    // We are using LONG because this value can become very large on huge archives.
    // In order to use the combiner function, also the map output needs to be a LONG.
    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //*** Set up the mapper, combiner and reducer
    job.setMapperClass(TikaMap.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    //*** Set the MAP output compression
    //job.getConfiguration().set("mapred.compress.map.output", "true");

    //*** Set input / output format
    job.setInputFormatClass(ArcInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //*** Start the job and wait for it
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}