Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool.java

License:Apache License

@Override
public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();
    try {//  www . ja  v a 2s . co m
        //conf
        zk = conf.get(MRUtils.AC_ZK_PROP, zk);
        ttl = conf.get(MRUtils.AC_TTL_PROP, ttl);
        instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance);
        userName = conf.get(MRUtils.AC_USERNAME_PROP, userName);
        pwd = conf.get(MRUtils.AC_PWD_PROP, pwd);
        workDirBase = conf.get(WORKDIR_PROP, workDirBase);
        format = conf.get(MRUtils.FORMAT_PROP, format);
        conf.set(MRUtils.FORMAT_PROP, format);
        final String inputDir = args[0];

        ZooKeeperInstance zooKeeperInstance = new ZooKeeperInstance(instance, zk);
        Connector connector = zooKeeperInstance.getConnector(userName, new PasswordToken(pwd));
        TableOperations tableOperations = connector.tableOperations();

        if (conf.get(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS) != null) {
            throw new IllegalArgumentException("Cannot use Bulk N Trips tool with Additional Indexers");
        }

        String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
        if (tablePrefix != null)
            RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
        String[] tables = { tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX,
                tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX,
                tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX };
        Collection<Job> jobs = new ArrayList<Job>();
        for (final String tableName : tables) {
            PrintStream out = null;
            try {
                String workDir = workDirBase + "/" + tableName;
                System.out.println("Loading data into table[" + tableName + "]");

                Job job = new Job(new Configuration(conf),
                        "Bulk Ingest load data to Generic RDF Table[" + tableName + "]");
                job.setJarByClass(this.getClass());
                //setting long job
                Configuration jobConf = job.getConfiguration();
                jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
                jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256"));
                jobConf.setBoolean("mapred.compress.map.output", true);
                //                    jobConf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //TODO: I would like LZO compression

                job.setInputFormatClass(TextInputFormat.class);

                job.setMapperClass(ParseNtripsMapper.class);
                job.setMapOutputKeyClass(Key.class);
                job.setMapOutputValueClass(Value.class);

                job.setCombinerClass(OutStmtMutationsReducer.class);
                job.setReducerClass(OutStmtMutationsReducer.class);
                job.setOutputFormatClass(AccumuloFileOutputFormat.class);
                // AccumuloFileOutputFormat.setZooKeeperInstance(jobConf, instance, zk);

                jobConf.set(ParseNtripsMapper.TABLE_PROPERTY, tableName);

                TextInputFormat.setInputPaths(job, new Path(inputDir));

                FileSystem fs = FileSystem.get(conf);
                Path workPath = new Path(workDir);
                if (fs.exists(workPath))
                    fs.delete(workPath, true);

                //make failures dir
                Path failures = new Path(workDir, "failures");
                fs.delete(failures, true);
                fs.mkdirs(new Path(workDir, "failures"));

                AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files"));

                out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt"))));

                if (!tableOperations.exists(tableName))
                    tableOperations.create(tableName);
                Collection<Text> splits = tableOperations.getSplits(tableName, Integer.MAX_VALUE);
                for (Text split : splits)
                    out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));

                job.setNumReduceTasks(splits.size() + 1);
                out.close();

                job.setPartitionerClass(KeyRangePartitioner.class);
                RangePartitioner.setSplitFile(job, workDir + "/splits.txt");

                jobConf.set(WORKDIR_PROP, workDir);

                job.submit();
                jobs.add(job);

            } catch (Exception re) {
                throw new RuntimeException(re);
            } finally {
                if (out != null)
                    out.close();
            }
        }

        for (Job job : jobs) {
            while (!job.isComplete()) {
                Thread.sleep(1000);
            }
        }

        for (String tableName : tables) {
            String workDir = workDirBase + "/" + tableName;
            String filesDir = workDir + "/files";
            String failuresDir = workDir + "/failures";

            FileSystem fs = FileSystem.get(conf);

            //make sure that the "accumulo" user can read/write/execute into these directories this path
            fs.setPermission(new Path(filesDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
            fs.setPermission(new Path(failuresDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));

            tableOperations.importDirectory(tableName, filesDir, failuresDir, false);

        }

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return 0;
}

From source file:mvm.rya.joinselect.mr.FullTableSize.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    String inTable = conf.get(SPO_TABLE);
    String outTable = conf.get(SELECTIVITY_TABLE);
    String auths = conf.get(AUTHS);

    assert inTable != null && outTable != null;

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    JoinSelectStatsUtil.initTableMRJob(job, inTable, outTable, auths);
    job.setMapperClass(FullTableMapper.class);
    job.setCombinerClass(FullTableCombiner.class);
    job.setReducerClass(FullTableReducer.class);
    job.setNumReduceTasks(1);/*w ww  .  ja  v a 2 s. c  om*/

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:mvm.rya.joinselect.mr.JoinSelectStatisticsSum.java

License:Apache License

@Override
public int run(String[] args)
        throws AccumuloSecurityException, IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = getConf();
    String outTable = conf.get(SELECTIVITY_TABLE);
    String auths = conf.get(AUTHS);
    String inPath = conf.get(INPUTPATH);

    assert outTable != null && inPath != null;

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    JoinSelectStatsUtil.initSumMRJob(job, inPath, outTable, auths);

    job.setMapperClass(CardinalityIdentityMapper.class);
    job.setCombinerClass(CardinalityIdentityCombiner.class);
    job.setReducerClass(CardinalityIdentityReducer.class);
    job.setNumReduceTasks(32);/*  w w  w  .j a  va  2s .  c om*/

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;

}

From source file:mx.iteso.msc.asn.mrwordcount.MyDriver.java

License:Apache License

/**
 * @param args the command line arguments
 *///from  www. j av  a  2s .  c om
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "WordCounter");
    job.setJarByClass(MyDriver.class);
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:myGrep.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from  w ww  .  ja  v a  2  s  .co m
    System.out.println("0:" + args[0] + " 1:" + args[1] + " 2:" + args[2]);

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {
        init_log();

        grepJob.setJobName("wzl-grep-search");
        grepJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(grepJob, args[0]);

        //grepJob.setMapperClass(RegexMapper.class);
        grepJob.setMapperClass(myRegMapper.class);

        //(string, int) -> sub int -> (string , sum)
        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        //FileOutputFormat.setOutputPath(grepJob, tempDir);
        FileOutputFormat.setOutputPath(grepJob, new Path(args[1]));
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        /*
                
        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("wzl-grep-sort");
        sortJob.setJarByClass(Grep.class);
                
        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);
                
        sortJob.setMapperClass(InverseMapper.class);
                
        sortJob.setNumReduceTasks(1);                 // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass(          // sort by decreasing freq
          LongWritable.DecreasingComparator.class);
                
        sortJob.waitForCompletion(true);
        */
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

From source file:mylab0.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w w  w  . j  a  va2s  . co m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:mylab0.WordCountMultipleInputs.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: wordcountmultipleinputs <input1> <input2> <out>");
        System.exit(2);//from ww  w.  ja  v  a 2  s .  c o  m
    }
    Job job = new Job(conf, "word count multiple inputs");
    job.setJarByClass(WordCountMultipleInputs.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, Mapper1.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, Mapper2.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(1);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:nbayes_mr.NBAYES_MR.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    // TODO code application logic here
    splitter("/home/hduser/hw4data.csv");

    //FileSystem hdfs=FileSystem.get(new Configuration());

    //System.out.println("1-----"+hdfs.getHomeDirectory());
    //Path inpdir=new Path(hdfs.getHomeDirectory().toString()+"/input");
    for (int i = 1; i <= 5; i++) {
        //hdfs.delete(inpdir,true);
        //FileUtils.cleanDirectory(new File());
        //hdfs.mkdirs(inpdir);
        //FileUtils.cleanDirectory(new File("/output"));
        //           for(int j=1;j<=5;j++){
        //              if(j!=i){
        //                 File source = new File("/home/hduser/data"+j+".txt");
        //                   File dest = new File("/input");
        //                   try {
        //                      
        //                      hdfs.copyFromLocalFile(new Path("/home/hduser/data"+j+".txt"),inpdir);
        //                      //FileUtils.copyFileToDirectory(source, dest);
        //                       //FileUtils.copyDirectory(source, dest);
        //                   } catch (IOException e) {
        //                       e.printStackTrace();
        //                   }
        //              }
        //           }
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "kmeans");
        job.setJarByClass(NBAYES_MR.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumCombiner.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(job, new Path("/input" + String.valueOf(i)));
        FileOutputFormat.setOutputPath(job, new Path("/output"));
        job.waitForCompletion(true);//from  w ww.  j  a v a  2 s.  c  o m

        FileSystem fOpen = FileSystem.get(conf);
        Path outputPathReduceFile = new Path("/output/part-r-00000");
        BufferedReader reader = new BufferedReader(new InputStreamReader(fOpen.open(outputPathReduceFile)));
        String Line = reader.readLine();
        //System.out.println(Line);
        while (Line != null) {
            String[] split = Line.split("_");
            String belongs[] = split[0].split(":");
            //System.out.println(Line);
            if (belongs[0].equalsIgnoreCase("X")) {
                probxmap.put(belongs[1], Integer.parseInt(split[1].trim()));
            } else if (belongs[0].equalsIgnoreCase("H")) {
                probhmap.put(belongs[1], Integer.parseInt(split[1].trim()));
            } else if (belongs[0].equalsIgnoreCase("X|H")) {
                //System.out.println(belongs[1]);
                probxhmap.put(belongs[1], Integer.parseInt(split[1].trim()));
            } else {
                total = Integer.parseInt(split[1].trim());
            }
            //probmap.put(split[0], Integer.parseInt(split[1]));
            Line = reader.readLine();
        }
        deleteFolder(conf, "/output");
        test("/home/hduser/data" + i + ".txt");

    }
    double avg = 0.0;
    for (int i = 0; i < accuracy.size(); i++) {
        avg += accuracy.get(i);
    }
    System.out.println("Accuracy : " + avg * 100 / 5);

}

From source file:ngramown.Ngramown.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    Job job = Job.getInstance(conf, "ngram");
    job.setJarByClass(Ngramown.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:njit.cs698.wenbin.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from w  ww .  ja  va  2  s  .com
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    //job.setReducerClass(IntSumReducer.class);
    ChainReducer.setReducer(job, IntSumReducer.class, Text.class, IntWritable.class, Text.class,
            IntWritable.class, new Configuration(false));
    ChainReducer.addMapper(job, ReducerMapper.class, Text.class, IntWritable.class, IntWritable.class,
            Text.class, new Configuration(false));
    //job.setOutputKeyClass(Text.class);
    //job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}