Example usage for org.apache.hadoop.mapred JobConf setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> theClass)

Source Link

Document

Set the Mapper class for the job.

Usage

From source file:hadoop.UIUCWikifierAppHadoop.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, UIUCWikifierAppHadoop.class);

    //      System.out.println("Run.. Envinronment Variables");
    //      java.util.Map<String,String> env = System.getenv();
    ////  w  w w. j a  v a  2s.c om
    //      System.out.println("Printing environment variables");
    //      for(String k : env.keySet()){
    //         System.out.println(k + "\t" + env.get(k));
    //      }
    //      String jlpValue = System.getProperty("java.library.path");
    //      System.out.println("java.library.path=" + jlpValue);
    //      System.setProperty("java.library.path", jlpValue + ":" + "/home/jgilme1/bin/gurobi550/linux64/lib");

    //process command line options
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    //change current working directory to hdfs path..
    job.setJobName("entitylinker");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(DistributeInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setMapperClass(Map.class);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(0);
    job.setNumMapTasks(Integer.parseInt(args[2]));
    job.set("mapreduce.input.fileinputformat.split.minsize", "0");
    job.set("mapred.child.java.opts", "-Xmx16g");
    job.setNumTasksToExecutePerJvm(-1);
    //job.setMemoryForMapTask(new Long(12288));
    //job.set(JobConf.MAPRED_MAP_TASK_ULIMIT, "12582912");

    String gurobiHomeVariable = "GUROBI_HOME";
    String gurobiHomeValue = "/home/jgilme1/bin/gurobi560/linux64";
    String pathVariable = "PATH";
    String newPathValue = gurobiHomeValue + "/bin";
    String ldLibraryPathVariable = "LD_LIBRARY_PATH";
    String ldLibraryPathValue = gurobiHomeValue + "/lib";
    String grbLicenseFileVariable = "GRB_LICENSE_FILE";
    String grbLicenseFileValue = "/scratch6/usr/jgilme1/gurobiLicense/gurobi.lic";

    StringBuilder newEnvironment = new StringBuilder();
    newEnvironment.append(gurobiHomeVariable);
    newEnvironment.append("=");
    newEnvironment.append(gurobiHomeValue);
    newEnvironment.append(",");
    newEnvironment.append(pathVariable);
    newEnvironment.append("=");
    newEnvironment.append("$" + pathVariable + ":");
    newEnvironment.append(newPathValue);
    newEnvironment.append(",");
    newEnvironment.append(ldLibraryPathVariable);
    newEnvironment.append("=$" + ldLibraryPathVariable + ":");
    newEnvironment.append(ldLibraryPathValue);
    newEnvironment.append(",");
    newEnvironment.append(grbLicenseFileVariable);
    newEnvironment.append("=");
    newEnvironment.append(grbLicenseFileValue);

    //System.out.println(newEnvironment.toString());
    job.set(JobConf.MAPRED_MAP_TASK_ENV, newEnvironment.toString());

    DistributedCache.addCacheArchive(new URI("/user/jgilme1/entitylinking/Wikifier2013.tar.gz"), job);

    JobClient.runJob(job);
    return 0;
}

From source file:hadoopProcesses.testJob.java

public static void start(String[] args) {
    try {/*w ww . j  a va  2s.c  o m*/

        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        map Map = new map();
        conf.setMapperClass(Map.getClass());

        reducer Reduce = new reducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

        FileSystem FS = FileSystem.get(conf);

        Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

        if (FS.exists(src)) {
            System.out.println("\t\t------ Results ------ ");
            /*
             BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
             String line;
             line = br.readLine();
             while (line != null) {
             System.out.println("\t" + line);
             line = br.readLine();
             }
             */

            List<String> FileList = (new fileInteractions()).readLines(src, conf);
            for (String LocString : FileList) {

                System.out.println(LocString);
            }
        }
    } catch (Exception Exp) {
        Exp.printStackTrace();
    }
}

From source file:hibench.DataGenerator.java

License:Apache License

public void sumUpZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Summing up Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();
    job.setJobName(jobname);/*from w  ww .  j  a v a  2  s . co  m*/

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(SumUpZipfMapper.class);
    job.setReducerClass(SumUpZipfReducer.class);

    job.setNumReduceTasks(1); // Important to sequentially accumulate the required space

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    DataPaths.checkHdfsFile(fout, false);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + fin);
    LOG.info("Zipfian sum up file as Ouput: " + fout);
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createZipf(Path fin, Path fout, ZipfRandom zipf) throws IOException {
    LOG.info("Creating Zipfian Id Distirubtion...");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = fin.getName() + " -> " + fout.getName();

    job.setJobName(jobname);/*  ww  w .  j  ava2 s  .c o  m*/

    zipf.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateZipfDistrMapper.class);

    job.setNumReduceTasks(0);

    job.setInputFormat(NLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, fin);
    FileOutputFormat.setOutputPath(job, fout);

    DataPaths.checkHdfsFile(fout, false);

    LOG.info("Running Job: " + jobname);
    LOG.info("Zipfian Sum File: " + fin);
    LOG.info("Zipfian Id distribution as Ouput: " + fout);
    RunningJob jobCreateZipf = JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    long vElems = jobCreateZipf.getCounters().getCounter(MAP_OUTPUT_RECORDS);
    LOG.info("Created " + vElems + " virtual zipfian elements");
    zipf.setVirtElems(vElems);
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createHtmlPages(Path dummy, HtmlConf html) throws IOException {

    LOG.info("Creating Html Pages...");

    Path fout = new Path(dummy.getParent(), "tmp");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create html pages to " + fout.getName();

    job.setJobName(jobname);/*  www.j  av  a2s. c om*/

    html.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateHtmlPagesMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, dummy);

    // first create result files under tmp folder
    FileOutputFormat.setOutputPath(job, fout);

    // begin from dummy file
    job.setInputFormat(NLineInputFormat.class);

    // use MultipleTextOutputFormat to produce three out files defined
    // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE
    job.setOutputFormat(HtmlMultipleTextOutputFormat.class);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + dummy);
    LOG.info("Multiple result Html files as <links, words, urls>");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    // Move result files under tmp into parent path
    // and remove the empty tmp path finally 
    DataPaths.moveFilesToParent(fout);
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

private void createPageRankNodes() throws IOException {

    LOG.info("Creating PageRank nodes...", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank nodes";

    job.setJobName(jobname);/*  w w  w .  j a va 2s . c  o  m*/

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS));
    job.setInputFormat(TextInputFormat.class);

    if (options.PAGERANK_NODE_BALANCE) {
        /***
         * Balance the output order of nodes, to prevent the running
         * of pagerank bench from potential data skew
         */
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setMapperClass(BalancedLinkNodesMapper.class);
        job.setReducerClass(BalancedLinkNodesReducer.class);
        //         job.setPartitionerClass(ModulusPartitioner.class);

        if (options.reds > 0) {
            job.setNumReduceTasks(options.reds);
        } else {
            job.setNumReduceTasks(DataOptions.getMaxNumReduce());
        }
    } else {
        job.setMapOutputKeyClass(Text.class);

        job.setMapperClass(OutputLinkNodesMapper.class);
        job.setNumReduceTasks(0);
    }

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS));

    LOG.info("Running Job: " + jobname);
    LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input");
    LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS));
}

From source file:hibench.PageRankDataGenerator.java

License:Apache License

/***
 * Create pagerank edge table, output link A->B as <A, B> pairs
 * @throws IOException//from   w ww  .jav a  2  s . c  o  m
 */
private void createPageRankLinks() throws IOException {

    LOG.info("Creating PageRank links", null);

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create " + paths.dname + " pagerank links";

    job.setJobName(jobname);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapOutputKeyClass(Text.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.T_LINK_PAGE));
    job.setInputFormat(TextInputFormat.class);

    job.setMapperClass(OutputLinkEdgesMapper.class);

    if (options.SEQUENCE_OUT) {
        job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
        job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.codecClass) {
        job.set("mapred.output.compression.type", "BLOCK");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, options.codecClass);
    }

    FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.EDGES));

    LOG.info("Running Job: " + jobname);
    LOG.info("Table link-page " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input");
    LOG.info("Edges file " + paths.getResult(DataPaths.EDGES) + " as output");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    LOG.info("Cleaning temp files...");
    paths.cleanTempFiles(paths.getResult(DataPaths.EDGES));
}

From source file:hitune.analysis.mapreduce.processor.HadoopMetrics.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HadoopMetrics.class);
    try {//from  www  . ja v  a2s  . c o  m
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HadoopMetrics.MapClass.class);
        conf.setReducerClass(SystemLog.ReduceClass.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputKeyClass(Text.class);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));
            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }

    } catch (Exception e) {

        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }

}

From source file:hitune.analysis.mapreduce.processor.HistoryLog.java

License:Apache License

public void run() {
    // TODO Auto-generated method stub
    long timestamp = System.currentTimeMillis();
    JobConf conf = new JobConf(this.conf, HistoryLog.class);
    try {/* w  w  w  .java 2  s.com*/
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(HistoryLog.MapClass.class);
        conf.setReducerClass(HistoryLog.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);

        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}

From source file:hitune.analysis.mapreduce.processor.InstrumentDataflow.java

License:Apache License

@Override
public void run() {
    // TODO Auto-generated method stub

    long timestamp = System.currentTimeMillis();

    JobConf conf = new JobConf(this.conf, InstrumentDataflow.class);
    try {// w w w.jav a 2 s . com
        conf.setJobName(this.getClass().getSimpleName() + timestamp);
        conf.setInputFormat(MultiSequenceFileInputFormat.class);
        conf.setMapperClass(InstrumentDataflow.MapClass.class);
        conf.setReducerClass(InstrumentDataflow.ReduceClass.class);
        conf.setOutputKeyClass(Text.class);
        Class<? extends WritableComparable> outputKeyClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass))
                .asSubclass(WritableComparable.class);
        Class<? extends Writable> outputValueClass = Class
                .forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass))
                .asSubclass(Writable.class);
        conf.setMapOutputKeyClass(outputKeyClass);
        conf.setMapOutputValueClass(outputValueClass);

        conf.setOutputValueClass(TextArrayWritable.class);
        conf.setOutputFormat(CSVFileOutputFormat.class);

        String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/"
                + conf.get(AnalysisProcessorConfiguration.reportfile);
        String temp_outputPaths = getTempOutputDir(outputPaths);

        if (this.inputfiles != null) {
            log.debug("inputPaths:" + inputfiles);
            FileInputFormat.setInputPaths(conf, inputfiles);
            FileOutputFormat.setOutputPath(conf, new Path(temp_outputPaths));

            //FileInputFormat.setInputPathFilter(conf, evtFileFilter.class);
            //conf.setNumReduceTasks(1);

            try {
                JobClient.runJob(conf);
                moveResults(conf, outputPaths, temp_outputPaths);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                log.warn("For " + getOutputFileName() + " :JOB fails!");
                log.warn(e);
                e.printStackTrace();
                this.MOVE_DONE = false;
            }

        } else {
            log.warn("For " + getOutputFileName() + " :No input path!");
        }
    } catch (Exception e) {
        log.warn("Job preparation failure!");
        log.warn(e);
        e.printStackTrace();
    }
}