Example usage for org.apache.hadoop.conf Configuration set

List of usage examples for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:MStress_Client.java

License:Open Source License

public static void main(String args[]) {
    parseOptions(args);//from  w  w  w. j  av  a2s  .  c  om
    int result = 0;

    try {
        Configuration conf = new Configuration(true);
        String confSet = "hdfs://" + dfsServer_ + ":" + dfsPort_;
        conf.set("fs.default.name", confSet);
        conf.set("fs.trash.interval", "0");
        InetSocketAddress inet = new InetSocketAddress(dfsServer_, dfsPort_);
        dfsClient_ = new DFSClient(inet, conf);

        if (parsePlanFile() < 0) {
            System.exit(-1);
        }

        if (testName_.equals("create")) {
            result = createDFSPaths();
        } else if (testName_.equals("stat")) {
            result = statDFSPaths();
        } else if (testName_.equals("readdir")) {
            result = listDFSPaths();
        } else if (testName_.equals("delete")) {
            result = removeDFSPaths();
        } else {
            System.out.printf("Error: unrecognized test \'%s\'\n", testName_);
            System.exit(-1);
        }
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(-1);
    }

    if (result != 0) {
        System.exit(-1);
    }

    return;
}

From source file:PopulateBseData.java

License:Apache License

public PopulateBseData() throws IOException {
    Configuration conf = new Configuration();
    conf.set("hbase.zookeeper.quorum", "localhost");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
    hTable = new HTable(conf, "stockData1");
}

From source file:ImportTsv.java

License:Apache License

/**
 * Sets up the actual job./*  ww  w  .  j a v a2s. c o m*/
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {
    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }

            // See if a non-default Mapper was set
            String mapperClassName = conf.get(MAPPER_CONF_KEY);
            Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

            TableName tableName = TableName.valueOf(args[0]);
            Path inputDir = new Path(args[1]);

            // set filter
            conf.set(EASTCOM_FILTER_PARAMS, args[3]);
            conf.set(EASTCOM_FILTER_DEFINE, args[4]);

            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(mapperClass);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(mapperClass);
            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
            if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
                String fileLoc = conf.get(CREDENTIALS_LOCATION);
                Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
                job.getCredentials().addAll(cred);
            }

            if (hfileOutPath != null) {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
                        LOG.warn(errorMsg);
                        // TODO: this is backwards. Instead of depending on the existence of a table,
                        // create a sane splits file for HFileOutputFormat based on data sampling.
                        createTable(admin, tableName, columns);
                    } else {
                        LOG.error(errorMsg);
                        throw new TableNotFoundException(errorMsg);
                    }
                }
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
                    // if no.strict is false then check column family
                    if (!noStrict) {
                        ArrayList<String> unmatchedFamilies = new ArrayList<String>();
                        Set<String> cfSet = getColumnFamilies(columns);
                        HTableDescriptor tDesc = table.getTableDescriptor();
                        for (String cf : cfSet) {
                            if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
                                unmatchedFamilies.add(cf);
                            }
                        }
                        if (unmatchedFamilies.size() > 0) {
                            ArrayList<String> familyNames = new ArrayList<String>();
                            for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
                                familyNames.add(family.getNameAsString());
                            }
                            String msg = "Column Families " + unmatchedFamilies + " specified in "
                                    + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName
                                    + " column families " + familyNames + ".\n"
                                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
                                    + "=true.\n";
                            usage(msg);
                            System.exit(-1);
                        }
                    }
                    job.setReducerClass(PutSortReducer.class);
                    Path outputDir = new Path(hfileOutPath);
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    if (mapperClass.equals(TsvImporterTextMapper.class)) {
                        job.setMapOutputValueClass(Text.class);
                        job.setReducerClass(TextSortReducer.class);
                    } else {
                        job.setMapOutputValueClass(Put.class);
                        job.setCombinerClass(PutCombiner.class);
                    }
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    LOG.error(errorMsg);
                    throw new TableNotFoundException(errorMsg);
                }
                if (mapperClass.equals(TsvImporterTextMapper.class)) {
                    usage(TsvImporterTextMapper.class.toString()
                            + " should not be used for non bulkloading case. use "
                            + TsvImporterMapper.class.toString()
                            + " or custom mapper whose value type is Put.");
                    System.exit(-1);
                }
                // No reducers. Just write straight to table. Call initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);
            }

            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Function.class /* Guava used by TsvParser */);
        }
    }
    return job;
}

From source file:HBaseBloomFilterSemiJoinSystemTest.java

License:Apache License

public static void setUp() throws Exception {
    Configuration conf = HBaseConfiguration.create();
    conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
    util = new HBaseTestingUtility(conf);

}

From source file:RunPageRankSchimmy.java

License:Apache License

private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner,
        boolean useRange) throws Exception {
    Configuration conf = getConf();

    String in = path + "/iter" + FORMAT.format(i);
    String out = path + "/iter" + FORMAT.format(j) + "t";
    String outm = out + "-mass";

    FileSystem fs = FileSystem.get(conf);

    // We need to actually count the number of part files to get the number
    // of partitions (because the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-")) {
            numPartitions++;//from  ww w  . ja  v  a 2s.  c o  m
        }
    }

    conf.setInt("NodeCount", n);

    Partitioner<IntWritable, Writable> p = null;

    if (useRange) {
        p = new RangePartitioner();
        ((Configurable) p).setConf(conf);
    } else {
        p = new HashPartitioner<IntWritable, Writable>();
    }

    // This is really annoying: the mapping between the partition numbers on
    // disk (i.e., part-XXXX) and what partition the file contains (i.e.,
    // key.hash % #reducer) is arbitrary... so this means that we need to
    // open up each partition, peek inside to find out.
    IntWritable key = new IntWritable();
    PageRankNode value = new PageRankNode();
    FileStatus[] status = fs.listStatus(new Path(in));

    StringBuilder sb = new StringBuilder();

    for (FileStatus f : status) {
        if (!f.getPath().getName().contains("part-")) {
            continue;
        }

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath()));

        reader.next(key, value);
        int np = p.getPartition(key, value, numPartitions);
        reader.close();

        LOG.info(f.getPath() + "\t" + np);
        sb.append(np + "=" + f.getPath() + ";");
    }

    LOG.info(sb.toString().trim());

    LOG.info("PageRankSchimmy: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + n);
    LOG.info(" - useCombiner: " + useCombiner);
    LOG.info(" - useInmapCombiner: " + useInmapCombiner);
    LOG.info(" - numPartitions: " + numPartitions);
    LOG.info(" - useRange: " + useRange);
    LOG.info("computed number of partitions: " + numPartitions);

    int numReduceTasks = numPartitions;

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    //conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.set("PageRankMassPath", outm);
    conf.set("BasePath", in);
    conf.set("PartitionMapping", sb.toString().trim());

    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    Job job = Job.getInstance(conf);
    job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1");
    job.setJarByClass(RunPageRankSchimmy.class);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useInmapCombiner) {
        job.setMapperClass(MapWithInMapperCombiningClass.class);
    } else {
        job.setMapperClass(MapClass.class);
    }

    if (useCombiner) {
        job.setCombinerClass(CombineClass.class);
    }

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(conf).delete(new Path(out), true);
    FileSystem.get(conf).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    float mass = Float.NEGATIVE_INFINITY;
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = sumLogProbs(mass, fin.readFloat());
        fin.close();
    }

    return mass;
}

From source file:TaskSearchWords.java

public static void main(String[] args) throws Exception {

    String hadoopServer = "ip-172-31-13-245.ap-southeast-1.compute.internal";

    Configuration conf = new Configuration();

    // this should be like defined in your mapred-site.xml
    conf.set("mapred.job.tracker", hadoopServer + ":54311");

    // like defined in hdfs-site.xml
    conf.set("fs.default.name", "hdfs://" + hadoopServer + ":9000");

    //setting mapred classes for HDFS to know which classes to process
    conf.set("mapreduce.map.class", "TokenizerMapper");
    conf.set("mapreduce.reduce.class", "IntSumReducer");

    //to prevent classdefnotfound exception
    conf.set("mapred.jar", "C:\\GitRepos\\OCR\\HadoopTasks\\dist\\HadoopTasks.jar");

    //to pass parameters to mapred classes
    conf.set("RAWOCRCLOB",
            "Omeprazole_Cap E/C 10mg\n" + "Dressit Ster esDress\n" + "Flaminal Forte 15g\n"
                    + "Co-Magaldrox_Susp 195mg/220mg/5ml S/F\n" + "Antacid/Oxetacaine_Oral Susp S/F\n"
                    + "Simeticone_Susp 40mg/ml S/F\n" + "Infacol_Susp 40mg/ml S/F");

    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(TaskSearchWords.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("/user/ubuntu/MedicinesProcessed.csv"));
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path("/user/ubuntu/processed/");
    fs.delete(out, true);/*  w  w w  .  jav  a2s. c o m*/

    //finally set the empty out path
    FileOutputFormat.setOutputPath(job, out);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);/*from w ww  .  j av  a2s  .  c om*/
        hdfs.delete(tempOutput2, true);
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool./*ww w.  j av  a 2  s  . c o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    //parsing more than 1 integer later;
    options.addOption(
            OptionBuilder.withArgName("src").hasArg().withDescription("source of pagerank").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(SOURCES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    //Change to array later
    String src = cmdline.getOptionValue(SOURCES);

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    //more to be set later;
    conf.set(NODE_SRC, src);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeMultiSrc.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeMultiSrc.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:RunPersonalizedPageRankBasic.java

License:Apache License

/**
 * Runs this tool./*ww  w.j a  va 2s.co m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(COMBINER, "use combiner"));
    options.addOption(new Option(INMAPPER_COMBINER, "user in-mapper combiner"));
    options.addOption(new Option(RANGE, "use range partitioner"));

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("base path").create(BASE));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("start iteration").create(START));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("end iteration").create(END));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("sources").hasArg().withDescription("personalized source nodes")
            .create(SRC_NODES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(BASE) || !cmdline.hasOption(START) || !cmdline.hasOption(END)
            || !cmdline.hasOption(NUM_NODES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String basePath = cmdline.getOptionValue(BASE);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    int s = Integer.parseInt(cmdline.getOptionValue(START));
    int e = Integer.parseInt(cmdline.getOptionValue(END));
    boolean useCombiner = cmdline.hasOption(COMBINER);
    boolean useInmapCombiner = cmdline.hasOption(INMAPPER_COMBINER);
    boolean useRange = cmdline.hasOption(RANGE);
    String src = cmdline.getOptionValue(SRC_NODES);

    LOG.info("Tool name: RunPageRank");
    LOG.info(" - base path: " + basePath);
    LOG.info(" - num nodes: " + n);
    LOG.info(" - start iteration: " + s);
    LOG.info(" - end iteration: " + e);
    LOG.info(" - use combiner: " + useCombiner);
    LOG.info(" - use in-mapper combiner: " + useInmapCombiner);
    LOG.info(" - user range partitioner: " + useRange);

    Configuration conf = getConf();
    conf.set(NODE_SRC, src);

    // Iterate PageRank.
    for (int i = s; i < e; i++) {
        iteratePageRank(i, i + 1, basePath, n, useCombiner, useInmapCombiner);
    }

    return 0;
}

From source file:RecordExtracting.java

License:Apache License

/**
 * Runs this tool./*  w w  w. j a v a  2s.c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    /* options.addOption(OptionBuilder.withArgName("path").hasArg()
         .withDescription("input path").create(INPUT));
     options.addOption(OptionBuilder.withArgName("path").hasArg()
         .withDescription("output path").create(OUTPUT));
     options.addOption(OptionBuilder.withArgName("num").hasArg()
         .withDescription("number of nodes").create(NUM_NODES));
     //parsing more than 1 integer later;*/
    options.addOption(
            OptionBuilder.withArgName("src").hasArg().withDescription("spamming users").create(SOURCES));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    /* if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)||!cmdline.hasOption(SOURCES)) {
       System.out.println("args: " + Arrays.toString(args));
       HelpFormatter formatter = new HelpFormatter();
       formatter.setWidth(120);
       formatter.printHelp(this.getClass().getName(), options);
       ToolRunner.printGenericCommandUsage(System.out);
       return -1;
     }*/

    String inputPath = "xzzqskfinal/reviewsNew.txt";//cmdline.getOptionValue(INPUT);
    String outputPath = "xzzqskfinal/SpammingRecord";//cmdline.getOptionValue(OUTPUT);
    //int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    //Change to array later
    String src = cmdline.getOptionValue(SOURCES);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + RecordExtracting.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    //LOG.info(" - numNodes: " + n);

    Configuration conf = getConf();

    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    //conf.setInt(NODE_CNT_FIELD, n);
    //more to be set later;
    conf.set(NODE_SRC, src);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(RatingSpamming.class.getSimpleName());
    job.setJarByClass(RecordExtracting.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    //job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}