Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:gr.ntua.h2rdf.loadTriples.SortIds.java

License:Apache License

public static Job createSubmittableJob(String[] args, Counters counters, int numReducers) throws IOException {
    //numReducers=52;
    Job job = new Job();
    TABLE_NAME = args[1];// www.  jav  a2  s .  c  o m
    Configuration conf = job.getConfiguration();
    long sum = 0, maxCommon = Integer.MAX_VALUE;
    try {
        HTable table = new HTable(HBaseConfiguration.create(), "Counters");

        for (int i = 1; i < numReducers; i++) {
            Get get = new Get(Bytes.toBytes("count." + i));
            get.addColumn(Bytes.toBytes("counter"), new byte[0]);
            Result res = table.get(get);
            if (!res.isEmpty()) {
                long v = Bytes.toLong(res.raw()[0].getValue());
                //long v = counters.findCounter("Countergroup", "count."+i).getValue();
                if (v < maxCommon) {
                    maxCommon = v;
                }
                //conf.setLong("count."+i, v);
                //System.out.println(v);
                sum += v;
            }
        }
        System.out.println("maxCommon: " + maxCommon);
        job.getConfiguration().setLong("count.MaxCommon", maxCommon);
        job.getConfiguration().setInt("count.numReducers", numReducers - 1);
        job.getConfiguration().setInt("count.sum", (int) sum);

        Get get = new Get(Bytes.toBytes("count.chunks"));
        get.addColumn(Bytes.toBytes("counter"), new byte[0]);
        Result res = table.get(get);
        int stringReducers = 0;
        if (!res.isEmpty()) {
            stringReducers = (int) Bytes.toLong(res.raw()[0].getValue());
        }
        //int stringReducers = (int) counters.findCounter("Countergroup", "count.chunks").getValue();
        int intReducers = (int) Math.ceil((double) sum / (double) bucket);
        sum = maxCommon * (numReducers - 1);
        for (int i = 1; i < numReducers; i++) {
            get = new Get(Bytes.toBytes("count." + i));
            get.addColumn(Bytes.toBytes("counter"), new byte[0]);
            res = table.get(get);
            if (!res.isEmpty()) {
                long v = Bytes.toLong(res.raw()[0].getValue());
                //long v = counters.findCounter("Countergroup", "count."+i).getValue();
                job.getConfiguration().setLong("count." + (i - 1), sum);
                //System.out.println("count."+i+" "+sum);
                sum += v - maxCommon;
            }

        }

        System.out
                .println("stringReducers: " + stringReducers + " sum: " + sum + " intReducers: " + intReducers);

        job.getConfiguration().setInt("count.stringReducers", stringReducers);
        job.getConfiguration().setInt("count.intReducers", intReducers);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setNumReduceTasks(stringReducers + intReducers);

        Path out = new Path(args[1]);
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
            if (fs.exists(new Path("temp")))
                fs.delete(new Path("temp"), true);
        } catch (IOException e) {
            e.printStackTrace();
        }
        FileOutputFormat.setOutputPath(job, out);
        FileInputFormat.addInputPath(job, new Path("uniqueIds"));
        FileInputFormat.addInputPath(job, new Path("blockIds"));

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        job.setJarByClass(SortIds.class);

        //configure compression
        StringBuilder compressionConfigValue = new StringBuilder();
        compressionConfigValue.append(URLEncoder.encode("1", "UTF-8"));
        compressionConfigValue.append('=');
        compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8"));
        compressionConfigValue.append('&');
        compressionConfigValue.append(URLEncoder.encode("2", "UTF-8"));
        compressionConfigValue.append('=');
        compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8"));
        job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
                compressionConfigValue.toString());

        job.getConfiguration().set("mapred.compress.map.output", "true");
        job.getConfiguration().set("mapred.map.output.compression.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setPartitionerClass(TwoTotalOrderPartitioner.class);
        TwoTotalOrderPartitioner.setPartitionFile(job.getConfiguration(),
                new Path("partition/stringIdPartition"));
        //job.setCombinerClass(Combiner.class);
        job.setJobName("SortIds");
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.Translate.java

License:Apache License

public static Job createSubmittableJob(String[] args) throws IOException {

    Job job = new Job();

    Configuration conf = job.getConfiguration();
    FileSystem fs;/* w  w  w  . ja v a2 s  . c  o m*/
    int reducers = 0;
    try {
        fs = FileSystem.get(conf);
        FileStatus[] p = fs.listStatus(new Path("blockIds/"));
        reducers = p.length;
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setNumReduceTasks(reducers);

        Path out = new Path("translations");
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
        FileOutputFormat.setOutputPath(job, out);
        FileInputFormat.addInputPath(job, new Path("temp"));

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(Translate.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setPartitionerClass(IdPartitioner.class);

        job.setJobName("Translate");
        job.getConfiguration().set("mapred.compress.map.output", "true");
        job.getConfiguration().set("mapred.map.output.compression.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java

License:Apache License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //compute sample partitions
    FileSystem fs;//from w  w w .  j  a  va 2 s  . c  o  m
    Configuration conf = new Configuration();
    int collected = 0, chunks = 0;
    try {
        fs = FileSystem.get(conf);
        Path sampleDir = new Path("sample");
        FileStatus[] samples = fs.listStatus(sampleDir);
        TreeSet<String> set = new TreeSet<String>();
        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                //System.out.println(tr[0].toN3());
                set.add(tr[0].toN3());
                set.add(tr[1].toN3());
                set.add(tr[2].toN3());
            }
            in1.close();
            in.close();
        }

        IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index");
        HashMap<String, Long> index = translator.translate(set);
        set.clear();
        TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>(
                new ImmutableBytesWritable.Comparator());

        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()),
                        index.get(tr[2].toN3()));
                set1.add(new ImmutableBytesWritable(btr.getSPOByte()));
                set1.add(new ImmutableBytesWritable(btr.getSOPByte()));
                set1.add(new ImmutableBytesWritable(btr.getOPSByte()));
                set1.add(new ImmutableBytesWritable(btr.getOSPByte()));
                set1.add(new ImmutableBytesWritable(btr.getPOSByte()));
                set1.add(new ImmutableBytesWritable(btr.getPSOByte()));
            }
            in1.close();
            in.close();
        }
        index.clear();

        Path p = new Path("hexastorePartition");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p,
                ImmutableBytesWritable.class, NullWritable.class);

        double chunkSize = bucketSampledTriples * DistinctIds.samplingRate;
        System.out.println("chunkSize: " + chunkSize);
        Iterator<ImmutableBytesWritable> it = set1.iterator();
        while (it.hasNext()) {
            ImmutableBytesWritable key = it.next();
            if (collected > chunkSize) {
                partitionWriter.append(key, NullWritable.get());
                //System.out.println(Bytes.toStringBinary(key.get()));
                collected = 0;
                chunks++;
            } else {
                collected++;
            }
        }
        System.out.println("chunks: " + chunks);
        partitionWriter.close();

    } catch (IOException e) {
        e.printStackTrace();
    }

    Job job = new Job();
    job = new Job(conf, "Import Hexastore");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    Path out = new Path("out");
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition"));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    StringBuilder compressionConfigValue = new StringBuilder();
    compressionConfigValue.append(URLEncoder.encode("I", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("S", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("T", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
            compressionConfigValue.toString());
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144);
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setJarByClass(TranslateAndImport.class);
    job.setMapperClass(Map.class);
    //job.setReducerClass(HexaStoreHistogramsReduce.class);
    job.setReducerClass(HexaStoreReduce.class);

    job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
    job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1);
    //job.setCombinerClass(Combiner.class);
    job.setJobName("Translate Projections");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
    job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    return job;

}

From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//from   ww w.j  ava  2s.co  m
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

}

From source file:hadoop.SleepJob.java

License:Apache License

public Job createJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime,
        int reduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    Job job = new Job(conf, "sleep");
    job.setNumReduceTasks(numReducer);//www.  j av a 2s. c o m
    job.setJarByClass(SleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(SleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");
    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}

From source file:hadoop.wordcount.partitioner.combiner.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  w  ww.  jav  a2s. c o  m
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.wordcount.partitioner.nocombiner.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w  ww  .ja v  a2s.  c  o m*/
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.wordcount.partitioner.permap.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  ww w.  ja va  2 s.  c o  m
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hamr.core.general.job.GeneralJob.java

License:Open Source License

public static void generalization(Class<? extends AnnotedBean> abClass, Job job) {
    job.setMapperClass(GeneralMapper.class);
    job.setPartitionerClass(GeneralPartitioner.class);
    job.setMapOutputKeyClass(abClass);//from   w ww  .  j av  a  2s  .  c om
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(GeneralReducer.class);
    job.setGroupingComparatorClass(GeneralGroupComparator.class);
}

From source file:hk.newsRecommender.TFIDF.java

License:Open Source License

public static void main(String[] args) throws Exception {
    // part1----------------------------------------------------
    Configuration conf0 = new Configuration();
    // DF()/*  w  ww  . j  a va 2 s.  co  m*/
    // FileSystem hdfs = FileSystem.get(conf1);
    // FileStatus p[] = hdfs.listStatus(new Path(args[0]));
    String hdfsUrl0 = conf0.get("fs.defaultFS");

    // ???NumReduceTasks
    Job job0 = Job.getInstance(conf0, "My_tdif_part0");
    job0.setJarByClass(TFIDF.class);
    job0.setMapperClass(Mapper_Part0.class);
    // job1.setCombinerClass(Combiner_Part1.class); // combiner?
    job0.setReducerClass(Reduce_Part0.class);
    job0.setMapOutputKeyClass(Text.class);
    job0.setMapOutputValueClass(Text.class);
    job0.setOutputKeyClass(Text.class);
    job0.setOutputValueClass(Text.class);
    // job1.setNumReduceTasks(p.length);

    FileInputFormat.addInputPath(job0, new Path(hdfsUrl0 + "/data/recommend/data2.txt"));
    FileOutputFormat.setOutputPath(job0, new Path(hdfsUrl0 + "/data/recommend/tfidf0"));

    job0.waitForCompletion(true);

    // part1----------------------------------------------------
    Configuration conf1 = new Configuration();
    // DF()
    // FileSystem hdfs = FileSystem.get(conf1);
    // FileStatus p[] = hdfs.listStatus(new Path(args[0]));
    String hdfsUrl = conf1.get("fs.defaultFS");

    // ???NumReduceTasks
    Job job1 = Job.getInstance(conf1, "My_tdif_part1");
    job1.setJarByClass(TFIDF.class);
    job1.setMapperClass(Mapper_Part1.class);
    // job1.setCombinerClass(Combiner_Part1.class); // combiner?
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    // job1.setNumReduceTasks(p.length);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner

    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf0"));
    FileOutputFormat.setOutputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf1"));

    job1.waitForCompletion(true);
    // part2----------------------------------------
    Configuration conf2 = new Configuration();

    Job job2 = Job.getInstance(conf2, "My_tdif_part2");
    job2.setJarByClass(TFIDF.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    // job2.setNumReduceTasks(p.length);

    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/tfidf1"));
    FileOutputFormat.setOutputPath(job2, new Path(hdfsUrl + "/data/recommend/tfidf2"));

    job2.waitForCompletion(true);

    //      part3----------------------------------------
    //      Configuration conf3 = new Configuration();
    //      
    //      Job job3 = Job.getInstance(conf3, "My_tdif_part3");
    //      job3.setJarByClass(TFIDF.class);
    //      job3.setMapOutputKeyClass(Text.class);
    //      job3.setMapOutputValueClass(Text.class);
    //      job3.setOutputKeyClass(Text.class);
    //      job3.setOutputValueClass(Text.class);
    //      job3.setMapperClass(Mapper_Part3.class);
    //      job3.setReducerClass(Reduce_Part3.class);
    //      // job2.setNumReduceTasks(p.length);
    //      
    //      FileInputFormat.setInputPaths(job3, new Path(hdfsUrl + "/data/recommend/tfidf2"));
    //      FileOutputFormat.setOutputPath(job3, new Path(hdfsUrl + "/data/recommend/tfidf3"));
    //      
    //      job3.waitForCompletion(true);
    //      hdfs.delete(new Path(args[1]), true);
}