Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool.java

License:Apache License

@Override
public int run(final String[] args) throws Exception {
    final Configuration conf = getConf();
    try {//  www . ja  v a 2s . co m
        //conf
        zk = conf.get(MRUtils.AC_ZK_PROP, zk);
        ttl = conf.get(MRUtils.AC_TTL_PROP, ttl);
        instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance);
        userName = conf.get(MRUtils.AC_USERNAME_PROP, userName);
        pwd = conf.get(MRUtils.AC_PWD_PROP, pwd);
        workDirBase = conf.get(WORKDIR_PROP, workDirBase);
        format = conf.get(MRUtils.FORMAT_PROP, format);
        conf.set(MRUtils.FORMAT_PROP, format);
        final String inputDir = args[0];

        ZooKeeperInstance zooKeeperInstance = new ZooKeeperInstance(instance, zk);
        Connector connector = zooKeeperInstance.getConnector(userName, new PasswordToken(pwd));
        TableOperations tableOperations = connector.tableOperations();

        if (conf.get(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS) != null) {
            throw new IllegalArgumentException("Cannot use Bulk N Trips tool with Additional Indexers");
        }

        String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null);
        if (tablePrefix != null)
            RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
        String[] tables = { tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX,
                tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX,
                tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX };
        Collection<Job> jobs = new ArrayList<Job>();
        for (final String tableName : tables) {
            PrintStream out = null;
            try {
                String workDir = workDirBase + "/" + tableName;
                System.out.println("Loading data into table[" + tableName + "]");

                Job job = new Job(new Configuration(conf),
                        "Bulk Ingest load data to Generic RDF Table[" + tableName + "]");
                job.setJarByClass(this.getClass());
                //setting long job
                Configuration jobConf = job.getConfiguration();
                jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
                jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256"));
                jobConf.setBoolean("mapred.compress.map.output", true);
                //                    jobConf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //TODO: I would like LZO compression

                job.setInputFormatClass(TextInputFormat.class);

                job.setMapperClass(ParseNtripsMapper.class);
                job.setMapOutputKeyClass(Key.class);
                job.setMapOutputValueClass(Value.class);

                job.setCombinerClass(OutStmtMutationsReducer.class);
                job.setReducerClass(OutStmtMutationsReducer.class);
                job.setOutputFormatClass(AccumuloFileOutputFormat.class);
                // AccumuloFileOutputFormat.setZooKeeperInstance(jobConf, instance, zk);

                jobConf.set(ParseNtripsMapper.TABLE_PROPERTY, tableName);

                TextInputFormat.setInputPaths(job, new Path(inputDir));

                FileSystem fs = FileSystem.get(conf);
                Path workPath = new Path(workDir);
                if (fs.exists(workPath))
                    fs.delete(workPath, true);

                //make failures dir
                Path failures = new Path(workDir, "failures");
                fs.delete(failures, true);
                fs.mkdirs(new Path(workDir, "failures"));

                AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files"));

                out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt"))));

                if (!tableOperations.exists(tableName))
                    tableOperations.create(tableName);
                Collection<Text> splits = tableOperations.getSplits(tableName, Integer.MAX_VALUE);
                for (Text split : splits)
                    out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));

                job.setNumReduceTasks(splits.size() + 1);
                out.close();

                job.setPartitionerClass(KeyRangePartitioner.class);
                RangePartitioner.setSplitFile(job, workDir + "/splits.txt");

                jobConf.set(WORKDIR_PROP, workDir);

                job.submit();
                jobs.add(job);

            } catch (Exception re) {
                throw new RuntimeException(re);
            } finally {
                if (out != null)
                    out.close();
            }
        }

        for (Job job : jobs) {
            while (!job.isComplete()) {
                Thread.sleep(1000);
            }
        }

        for (String tableName : tables) {
            String workDir = workDirBase + "/" + tableName;
            String filesDir = workDir + "/files";
            String failuresDir = workDir + "/failures";

            FileSystem fs = FileSystem.get(conf);

            //make sure that the "accumulo" user can read/write/execute into these directories this path
            fs.setPermission(new Path(filesDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
            fs.setPermission(new Path(failuresDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));

            tableOperations.importDirectory(tableName, filesDir, failuresDir, false);

        }

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return 0;
}

From source file:mvm.rya.joinselect.mr.FullTableSize.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    String inTable = conf.get(SPO_TABLE);
    String outTable = conf.get(SELECTIVITY_TABLE);
    String auths = conf.get(AUTHS);

    assert inTable != null && outTable != null;

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    JoinSelectStatsUtil.initTableMRJob(job, inTable, outTable, auths);
    job.setMapperClass(FullTableMapper.class);
    job.setCombinerClass(FullTableCombiner.class);
    job.setReducerClass(FullTableReducer.class);
    job.setNumReduceTasks(1);/*w ww  .  ja  v a 2 s. c  om*/

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:mvm.rya.joinselect.mr.JoinSelectStatisticsSum.java

License:Apache License

@Override
public int run(String[] args)
        throws AccumuloSecurityException, IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = getConf();
    String outTable = conf.get(SELECTIVITY_TABLE);
    String auths = conf.get(AUTHS);
    String inPath = conf.get(INPUTPATH);

    assert outTable != null && inPath != null;

    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
    job.setJarByClass(this.getClass());
    JoinSelectStatsUtil.initSumMRJob(job, inPath, outTable, auths);

    job.setMapperClass(CardinalityIdentityMapper.class);
    job.setCombinerClass(CardinalityIdentityCombiner.class);
    job.setReducerClass(CardinalityIdentityReducer.class);
    job.setNumReduceTasks(32);/*  w w  w  .j a  va  2s .  c om*/

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;

}

From source file:mx.iteso.msc.asn.mrwordcount.MyDriver.java

License:Apache License

/**
 * @param args the command line arguments
 *///from  www. j av  a  2s .  c om
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "WordCounter");
    job.setJarByClass(MyDriver.class);
    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:myGrep.Grep.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from  w ww  .  ja  v a  2  s  .co m
    System.out.println("0:" + args[0] + " 1:" + args[1] + " 2:" + args[2]);

    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);

    Job grepJob = Job.getInstance(conf);

    try {
        init_log();

        grepJob.setJobName("wzl-grep-search");
        grepJob.setJarByClass(Grep.class);

        FileInputFormat.setInputPaths(grepJob, args[0]);

        //grepJob.setMapperClass(RegexMapper.class);
        grepJob.setMapperClass(myRegMapper.class);

        //(string, int) -> sub int -> (string , sum)
        grepJob.setCombinerClass(LongSumReducer.class);
        grepJob.setReducerClass(LongSumReducer.class);

        //FileOutputFormat.setOutputPath(grepJob, tempDir);
        FileOutputFormat.setOutputPath(grepJob, new Path(args[1]));
        grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        grepJob.setOutputKeyClass(Text.class);
        grepJob.setOutputValueClass(LongWritable.class);

        grepJob.waitForCompletion(true);

        /*
                
        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("wzl-grep-sort");
        sortJob.setJarByClass(Grep.class);
                
        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClass(SequenceFileInputFormat.class);
                
        sortJob.setMapperClass(InverseMapper.class);
                
        sortJob.setNumReduceTasks(1);                 // write a single file
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        sortJob.setSortComparatorClass(          // sort by decreasing freq
          LongWritable.DecreasingComparator.class);
                
        sortJob.waitForCompletion(true);
        */
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

From source file:mylab0.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w w  w  . j  a  va2s  . co m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:mylab0.WordCountMultipleInputs.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: wordcountmultipleinputs <input1> <input2> <out>");
        System.exit(2);//from ww  w.  ja  v  a 2  s .  c o  m
    }
    Job job = new Job(conf, "word count multiple inputs");
    job.setJarByClass(WordCountMultipleInputs.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, Mapper1.class);
    MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, Mapper2.class);

    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(1);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:nbayes_mr.NBAYES_MR.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    // TODO code application logic here
    splitter("/home/hduser/hw4data.csv");

    //FileSystem hdfs=FileSystem.get(new Configuration());

    //System.out.println("1-----"+hdfs.getHomeDirectory());
    //Path inpdir=new Path(hdfs.getHomeDirectory().toString()+"/input");
    for (int i = 1; i <= 5; i++) {
        //hdfs.delete(inpdir,true);
        //FileUtils.cleanDirectory(new File());
        //hdfs.mkdirs(inpdir);
        //FileUtils.cleanDirectory(new File("/output"));
        //           for(int j=1;j<=5;j++){
        //              if(j!=i){
        //                 File source = new File("/home/hduser/data"+j+".txt");
        //                   File dest = new File("/input");
        //                   try {
        //                      
        //                      hdfs.copyFromLocalFile(new Path("/home/hduser/data"+j+".txt"),inpdir);
        //                      //FileUtils.copyFileToDirectory(source, dest);
        //                       //FileUtils.copyDirectory(source, dest);
        //                   } catch (IOException e) {
        //                       e.printStackTrace();
        //                   }
        //              }
        //           }
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "kmeans");
        job.setJarByClass(NBAYES_MR.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumCombiner.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(job, new Path("/input" + String.valueOf(i)));
        FileOutputFormat.setOutputPath(job, new Path("/output"));
        job.waitForCompletion(true);//from  w ww.  j  a v a  2 s.  c  o m

        FileSystem fOpen = FileSystem.get(conf);
        Path outputPathReduceFile = new Path("/output/part-r-00000");
        BufferedReader reader = new BufferedReader(new InputStreamReader(fOpen.open(outputPathReduceFile)));
        String Line = reader.readLine();
        //System.out.println(Line);
        while (Line != null) {
            String[] split = Line.split("_");
            String belongs[] = split[0].split(":");
            //System.out.println(Line);
            if (belongs[0].equalsIgnoreCase("X")) {
                probxmap.put(belongs[1], Integer.parseInt(split[1].trim()));
            } else if (belongs[0].equalsIgnoreCase("H")) {
                probhmap.put(belongs[1], Integer.parseInt(split[1].trim()));
            } else if (belongs[0].equalsIgnoreCase("X|H")) {
                //System.out.println(belongs[1]);
                probxhmap.put(belongs[1], Integer.parseInt(split[1].trim()));
            } else {
                total = Integer.parseInt(split[1].trim());
            }
            //probmap.put(split[0], Integer.parseInt(split[1]));
            Line = reader.readLine();
        }
        deleteFolder(conf, "/output");
        test("/home/hduser/data" + i + ".txt");

    }
    double avg = 0.0;
    for (int i = 0; i < accuracy.size(); i++) {
        avg += accuracy.get(i);
    }
    System.out.println("Accuracy : " + avg * 100 / 5);

}

From source file:ngramown.Ngramown.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    Job job = Job.getInstance(conf, "ngram");
    job.setJarByClass(Ngramown.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:njit.cs698.wenbin.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);//from w  ww .  ja  va  2  s  .com
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    //job.setReducerClass(IntSumReducer.class);
    ChainReducer.setReducer(job, IntSumReducer.class, Text.class, IntWritable.class, Text.class,
            IntWritable.class, new Configuration(false));
    ChainReducer.addMapper(job, ReducerMapper.class, Text.class, IntWritable.class, IntWritable.class,
            Text.class, new Configuration(false));
    //job.setOutputKeyClass(Text.class);
    //job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}