Example usage for org.apache.hadoop.mapreduce Job setPartitionerClass

List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setPartitionerClass.

Prototype

public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException 

Source Link

Document

Set the Partitioner for the job.

Usage

From source file:gr.ntua.h2rdf.loadTriples.SortIds.java

License:Apache License

public static Job createSubmittableJob(String[] args, Counters counters, int numReducers) throws IOException {
    //numReducers=52;
    Job job = new Job();
    TABLE_NAME = args[1];// www.  jav  a2  s .  c  o m
    Configuration conf = job.getConfiguration();
    long sum = 0, maxCommon = Integer.MAX_VALUE;
    try {
        HTable table = new HTable(HBaseConfiguration.create(), "Counters");

        for (int i = 1; i < numReducers; i++) {
            Get get = new Get(Bytes.toBytes("count." + i));
            get.addColumn(Bytes.toBytes("counter"), new byte[0]);
            Result res = table.get(get);
            if (!res.isEmpty()) {
                long v = Bytes.toLong(res.raw()[0].getValue());
                //long v = counters.findCounter("Countergroup", "count."+i).getValue();
                if (v < maxCommon) {
                    maxCommon = v;
                }
                //conf.setLong("count."+i, v);
                //System.out.println(v);
                sum += v;
            }
        }
        System.out.println("maxCommon: " + maxCommon);
        job.getConfiguration().setLong("count.MaxCommon", maxCommon);
        job.getConfiguration().setInt("count.numReducers", numReducers - 1);
        job.getConfiguration().setInt("count.sum", (int) sum);

        Get get = new Get(Bytes.toBytes("count.chunks"));
        get.addColumn(Bytes.toBytes("counter"), new byte[0]);
        Result res = table.get(get);
        int stringReducers = 0;
        if (!res.isEmpty()) {
            stringReducers = (int) Bytes.toLong(res.raw()[0].getValue());
        }
        //int stringReducers = (int) counters.findCounter("Countergroup", "count.chunks").getValue();
        int intReducers = (int) Math.ceil((double) sum / (double) bucket);
        sum = maxCommon * (numReducers - 1);
        for (int i = 1; i < numReducers; i++) {
            get = new Get(Bytes.toBytes("count." + i));
            get.addColumn(Bytes.toBytes("counter"), new byte[0]);
            res = table.get(get);
            if (!res.isEmpty()) {
                long v = Bytes.toLong(res.raw()[0].getValue());
                //long v = counters.findCounter("Countergroup", "count."+i).getValue();
                job.getConfiguration().setLong("count." + (i - 1), sum);
                //System.out.println("count."+i+" "+sum);
                sum += v - maxCommon;
            }

        }

        System.out
                .println("stringReducers: " + stringReducers + " sum: " + sum + " intReducers: " + intReducers);

        job.getConfiguration().setInt("count.stringReducers", stringReducers);
        job.getConfiguration().setInt("count.intReducers", intReducers);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setNumReduceTasks(stringReducers + intReducers);

        Path out = new Path(args[1]);
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
            if (fs.exists(new Path("temp")))
                fs.delete(new Path("temp"), true);
        } catch (IOException e) {
            e.printStackTrace();
        }
        FileOutputFormat.setOutputPath(job, out);
        FileInputFormat.addInputPath(job, new Path("uniqueIds"));
        FileInputFormat.addInputPath(job, new Path("blockIds"));

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        job.setJarByClass(SortIds.class);

        //configure compression
        StringBuilder compressionConfigValue = new StringBuilder();
        compressionConfigValue.append(URLEncoder.encode("1", "UTF-8"));
        compressionConfigValue.append('=');
        compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8"));
        compressionConfigValue.append('&');
        compressionConfigValue.append(URLEncoder.encode("2", "UTF-8"));
        compressionConfigValue.append('=');
        compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8"));
        job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
                compressionConfigValue.toString());

        job.getConfiguration().set("mapred.compress.map.output", "true");
        job.getConfiguration().set("mapred.map.output.compression.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setPartitionerClass(TwoTotalOrderPartitioner.class);
        TwoTotalOrderPartitioner.setPartitionFile(job.getConfiguration(),
                new Path("partition/stringIdPartition"));
        //job.setCombinerClass(Combiner.class);
        job.setJobName("SortIds");
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.Translate.java

License:Apache License

public static Job createSubmittableJob(String[] args) throws IOException {

    Job job = new Job();

    Configuration conf = job.getConfiguration();
    FileSystem fs;/* w  w  w  . ja v a2 s  . c  o m*/
    int reducers = 0;
    try {
        fs = FileSystem.get(conf);
        FileStatus[] p = fs.listStatus(new Path("blockIds/"));
        reducers = p.length;
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setNumReduceTasks(reducers);

        Path out = new Path("translations");
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
        FileOutputFormat.setOutputPath(job, out);
        FileInputFormat.addInputPath(job, new Path("temp"));

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(Translate.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setPartitionerClass(IdPartitioner.class);

        job.setJobName("Translate");
        job.getConfiguration().set("mapred.compress.map.output", "true");
        job.getConfiguration().set("mapred.map.output.compression.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java

License:Apache License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //compute sample partitions
    FileSystem fs;//from w  w w .  j  a  va 2 s  . c  o  m
    Configuration conf = new Configuration();
    int collected = 0, chunks = 0;
    try {
        fs = FileSystem.get(conf);
        Path sampleDir = new Path("sample");
        FileStatus[] samples = fs.listStatus(sampleDir);
        TreeSet<String> set = new TreeSet<String>();
        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                //System.out.println(tr[0].toN3());
                set.add(tr[0].toN3());
                set.add(tr[1].toN3());
                set.add(tr[2].toN3());
            }
            in1.close();
            in.close();
        }

        IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index");
        HashMap<String, Long> index = translator.translate(set);
        set.clear();
        TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>(
                new ImmutableBytesWritable.Comparator());

        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()),
                        index.get(tr[2].toN3()));
                set1.add(new ImmutableBytesWritable(btr.getSPOByte()));
                set1.add(new ImmutableBytesWritable(btr.getSOPByte()));
                set1.add(new ImmutableBytesWritable(btr.getOPSByte()));
                set1.add(new ImmutableBytesWritable(btr.getOSPByte()));
                set1.add(new ImmutableBytesWritable(btr.getPOSByte()));
                set1.add(new ImmutableBytesWritable(btr.getPSOByte()));
            }
            in1.close();
            in.close();
        }
        index.clear();

        Path p = new Path("hexastorePartition");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p,
                ImmutableBytesWritable.class, NullWritable.class);

        double chunkSize = bucketSampledTriples * DistinctIds.samplingRate;
        System.out.println("chunkSize: " + chunkSize);
        Iterator<ImmutableBytesWritable> it = set1.iterator();
        while (it.hasNext()) {
            ImmutableBytesWritable key = it.next();
            if (collected > chunkSize) {
                partitionWriter.append(key, NullWritable.get());
                //System.out.println(Bytes.toStringBinary(key.get()));
                collected = 0;
                chunks++;
            } else {
                collected++;
            }
        }
        System.out.println("chunks: " + chunks);
        partitionWriter.close();

    } catch (IOException e) {
        e.printStackTrace();
    }

    Job job = new Job();
    job = new Job(conf, "Import Hexastore");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    Path out = new Path("out");
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition"));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    StringBuilder compressionConfigValue = new StringBuilder();
    compressionConfigValue.append(URLEncoder.encode("I", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("S", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("T", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
            compressionConfigValue.toString());
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144);
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setJarByClass(TranslateAndImport.class);
    job.setMapperClass(Map.class);
    //job.setReducerClass(HexaStoreHistogramsReduce.class);
    job.setReducerClass(HexaStoreReduce.class);

    job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
    job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1);
    //job.setCombinerClass(Combiner.class);
    job.setJobName("Translate Projections");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
    job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    return job;

}

From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//from   ww w.j  ava  2s.co  m
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

}

From source file:hadoop.SleepJob.java

License:Apache License

public Job createJob(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime,
        int reduceSleepCount) throws IOException {
    Configuration conf = getConf();
    conf.setLong(MAP_SLEEP_TIME, mapSleepTime);
    conf.setLong(REDUCE_SLEEP_TIME, reduceSleepTime);
    conf.setInt(MAP_SLEEP_COUNT, mapSleepCount);
    conf.setInt(REDUCE_SLEEP_COUNT, reduceSleepCount);
    conf.setInt(MRJobConfig.NUM_MAPS, numMapper);
    Job job = new Job(conf, "sleep");
    job.setNumReduceTasks(numReducer);//www.  j av a 2s. c o m
    job.setJarByClass(SleepJob.class);
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setInputFormatClass(SleepInputFormat.class);
    job.setPartitionerClass(SleepJobPartitioner.class);
    job.setSpeculativeExecution(false);
    job.setJobName("Sleep job");
    FileInputFormat.addInputPath(job, new Path("ignored"));
    return job;
}

From source file:hadoop.wordcount.partitioner.combiner.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  w  ww.  jav  a2s. c o  m
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.wordcount.partitioner.nocombiner.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from w  ww  .ja v  a2s.  c  o m*/
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    //      job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hadoop.wordcount.partitioner.permap.WordCountAddPartitioner.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from  ww w.  ja va  2 s.  c o  m
    }
    Job job = new Job(conf, "word count");
    job.setNumReduceTasks(5);
    job.setJarByClass(WordCountAddPartitioner.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:hamr.core.general.job.GeneralJob.java

License:Open Source License

public static void generalization(Class<? extends AnnotedBean> abClass, Job job) {
    job.setMapperClass(GeneralMapper.class);
    job.setPartitionerClass(GeneralPartitioner.class);
    job.setMapOutputKeyClass(abClass);//from   w ww  .  j av  a  2s  .  c om
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(GeneralReducer.class);
    job.setGroupingComparatorClass(GeneralGroupComparator.class);
}

From source file:hk.newsRecommender.TFIDF.java

License:Open Source License

public static void main(String[] args) throws Exception {
    // part1----------------------------------------------------
    Configuration conf0 = new Configuration();
    // DF()/*  w  ww  . j  a va 2 s.  co  m*/
    // FileSystem hdfs = FileSystem.get(conf1);
    // FileStatus p[] = hdfs.listStatus(new Path(args[0]));
    String hdfsUrl0 = conf0.get("fs.defaultFS");

    // ???NumReduceTasks
    Job job0 = Job.getInstance(conf0, "My_tdif_part0");
    job0.setJarByClass(TFIDF.class);
    job0.setMapperClass(Mapper_Part0.class);
    // job1.setCombinerClass(Combiner_Part1.class); // combiner?
    job0.setReducerClass(Reduce_Part0.class);
    job0.setMapOutputKeyClass(Text.class);
    job0.setMapOutputValueClass(Text.class);
    job0.setOutputKeyClass(Text.class);
    job0.setOutputValueClass(Text.class);
    // job1.setNumReduceTasks(p.length);

    FileInputFormat.addInputPath(job0, new Path(hdfsUrl0 + "/data/recommend/data2.txt"));
    FileOutputFormat.setOutputPath(job0, new Path(hdfsUrl0 + "/data/recommend/tfidf0"));

    job0.waitForCompletion(true);

    // part1----------------------------------------------------
    Configuration conf1 = new Configuration();
    // DF()
    // FileSystem hdfs = FileSystem.get(conf1);
    // FileStatus p[] = hdfs.listStatus(new Path(args[0]));
    String hdfsUrl = conf1.get("fs.defaultFS");

    // ???NumReduceTasks
    Job job1 = Job.getInstance(conf1, "My_tdif_part1");
    job1.setJarByClass(TFIDF.class);
    job1.setMapperClass(Mapper_Part1.class);
    // job1.setCombinerClass(Combiner_Part1.class); // combiner?
    job1.setReducerClass(Reduce_Part1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    // job1.setNumReduceTasks(p.length);
    job1.setPartitionerClass(MyPartitoner.class); // MyPartitoner

    FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf0"));
    FileOutputFormat.setOutputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf1"));

    job1.waitForCompletion(true);
    // part2----------------------------------------
    Configuration conf2 = new Configuration();

    Job job2 = Job.getInstance(conf2, "My_tdif_part2");
    job2.setJarByClass(TFIDF.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(Text.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(Text.class);
    job2.setMapperClass(Mapper_Part2.class);
    job2.setReducerClass(Reduce_Part2.class);
    // job2.setNumReduceTasks(p.length);

    FileInputFormat.setInputPaths(job2, new Path(hdfsUrl + "/data/recommend/tfidf1"));
    FileOutputFormat.setOutputPath(job2, new Path(hdfsUrl + "/data/recommend/tfidf2"));

    job2.waitForCompletion(true);

    //      part3----------------------------------------
    //      Configuration conf3 = new Configuration();
    //      
    //      Job job3 = Job.getInstance(conf3, "My_tdif_part3");
    //      job3.setJarByClass(TFIDF.class);
    //      job3.setMapOutputKeyClass(Text.class);
    //      job3.setMapOutputValueClass(Text.class);
    //      job3.setOutputKeyClass(Text.class);
    //      job3.setOutputValueClass(Text.class);
    //      job3.setMapperClass(Mapper_Part3.class);
    //      job3.setReducerClass(Reduce_Part3.class);
    //      // job2.setNumReduceTasks(p.length);
    //      
    //      FileInputFormat.setInputPaths(job3, new Path(hdfsUrl + "/data/recommend/tfidf2"));
    //      FileOutputFormat.setOutputPath(job3, new Path(hdfsUrl + "/data/recommend/tfidf3"));
    //      
    //      job3.waitForCompletion(true);
    //      hdfs.delete(new Path(args[1]), true);
}