Example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass)

Source Link

Document

Set the value class for the map output data.

Usage

From source file:org.smartfrog.services.hadoop.benchmark.citerank.UpdateRanks.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        return usage("<input path> <output path> <number of pages> <dangling pages contribution>");
    }/*w w  w.j a  v  a  2  s. c  o  m*/

    JobConf conf = createInputOutputConfiguration(args);

    conf.set(CiteRankTool.RANK_COUNT, args[2]);
    conf.set(CiteRankTool.RANK_DANGLING, args[3]);

    conf.setMapperClass(UpdateRanksMapper.class);
    conf.setReducerClass(UpdateRanksReducer.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setNumMapTasks(CiteRankTool.NUM_MAP_TASKS);
    conf.setNumReduceTasks(CiteRank.NUM_REDUCE_TASKS);

    return runJob(conf);
}

From source file:org.terrier.applications.HadoopIndexing.java

License:Mozilla Public License

/** Starts the MapReduce indexing.
 * @param args/*from  www  .java 2  s .  c  o  m*/
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    long time = System.currentTimeMillis();

    boolean docPartitioned = false;
    int numberOfReducers = Integer
            .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26"));
    final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing");
    if (args.length == 2 && args[0].equals("-p")) {
        logger.info("Document-partitioned Mode, " + numberOfReducers + " output indices.");
        numberOfReducers = Integer.parseInt(args[1]);
        docPartitioned = true;
    } else if (args.length == 1 && args[0].equals("--merge")) {
        if (numberOfReducers > 1)
            mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        else
            logger.error("No point merging 1 reduce task output");
        return;
    } else if (args.length == 0) {
        logger.info("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index.");
        docPartitioned = false;
        if (numberOfReducers > MAX_REDUCE) {
            logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use "
                    + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most");
        }
    } else {
        logger.fatal(usage());
        return;
    }

    if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0],
            false) instanceof BitCompressionConfiguration)) {
        logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing"
                + " - you can recompress the inverted index later using IndexRecompressor");
        return;
    }

    if (jf == null)
        throw new Exception("Could not get JobFactory from HadoopPlugin");
    final JobConf conf = jf.newJob();
    conf.setJobName("terrierIndexing");
    if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH)
            && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) {
        logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + ","
                + ApplicationSetup.TERRIER_INDEX_PREFIX);
        return;
    }

    boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING;
    if (blockIndexing) {
        conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class);
    } else {
        conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class);
    }
    FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH));
    conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX);
    conf.setMapOutputKeyClass(SplitEmittedTerm.class);
    conf.setMapOutputValueClass(MapEmittedPostingList.class);
    conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned);

    if (!conf.get("mapred.job.tracker").equals("local")) {
        conf.setMapOutputCompressorClass(GzipCodec.class);
        conf.setCompressMapOutput(true);
    } else {
        conf.setCompressMapOutput(false);
    }

    conf.setInputFormat(MultiFileCollectionInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class);
    conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class);
    conf.setReduceSpeculativeExecution(false);
    //parse the collection.spec
    BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC);
    String line = null;
    List<Path> paths = new ArrayList<Path>();
    while ((line = specBR.readLine()) != null) {
        if (line.startsWith("#"))
            continue;
        paths.add(new Path(line));
    }
    specBR.close();
    FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()]));
    conf.setNumReduceTasks(numberOfReducers);
    if (numberOfReducers > 1) {
        if (docPartitioned)
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class);
        else
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class);
    } else {
        //for JUnit tests, we seem to need to restore the original partitioner class
        conf.setPartitionerClass(HashPartitioner.class);
    }

    JobID jobId = null;
    boolean ranOK = true;
    try {
        RunningJob rj = JobClient.runJob(conf);
        jobId = rj.getID();
        HadoopUtility.finishTerrierJob(conf);
    } catch (Exception e) {
        logger.error("Problem running job", e);
        ranOK = false;
    }
    if (jobId != null) {
        deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId);
    }
    if (ranOK) {
        if (!docPartitioned) {
            if (numberOfReducers > 1)
                mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        }

        Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH,
                docPartitioned ? numberOfReducers : 1, jf);
    }
    System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
    jf.close();
}

From source file:org.terrier.structures.indexing.CompressingMetaIndexBuilder.java

License:Mozilla Public License

/**
 * reverseAsMapReduceJob//w w  w  . jav a 2 s.  co m
 * @param index
 * @param structureName
 * @param keys
 * @param jf
 * @throws Exception
 */
//@SuppressWarnings("deprecation")
public static void reverseAsMapReduceJob(IndexOnDisk index, String structureName, String[] keys,
        HadoopPlugin.JobFactory jf) throws Exception {
    long time = System.currentTimeMillis();
    final JobConf conf = jf.newJob();
    conf.setJobName("Reverse MetaIndex");
    conf.setMapOutputKeyClass(KeyValueTuple.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setMapperClass(MapperReducer.class);
    conf.setReducerClass(MapperReducer.class);
    conf.setNumReduceTasks(keys.length);
    conf.setPartitionerClass(KeyedPartitioner.class);
    conf.setInputFormat(CompressingMetaIndexInputFormat.class);
    conf.setReduceSpeculativeExecution(false);
    conf.set("MetaIndexInputStreamRecordReader.structureName", structureName);
    conf.setInt("CompressingMetaIndexBuilder.reverse.keyCount", keys.length);
    conf.set("CompressingMetaIndexBuilder.reverse.keys", ArrayUtils.join(keys, ","));
    conf.set("CompressingMetaIndexBuilder.forward.valueLengths",
            index.getIndexProperty("index." + structureName + ".value-lengths", ""));
    conf.set("CompressingMetaIndexBuilder.forward.keys",
            index.getIndexProperty("index." + structureName + ".key-names", ""));
    FileOutputFormat.setOutputPath(conf, new Path(index.getPath()));
    HadoopUtility.toHConfiguration(index, conf);

    conf.setOutputFormat(NullOutputFormat.class);
    try {
        RunningJob rj = JobClient.runJob(conf);
        rj.getID();
        HadoopUtility.finishTerrierJob(conf);
    } catch (Exception e) {
        throw new Exception("Problem running job to reverse metadata", e);
    }
    //only update the index from the controlling process, so that we dont have locking/concurrency issues
    index.setIndexProperty("index." + structureName + ".reverse-key-names", ArrayUtils.join(keys, ","));
    index.flush();
    logger.info("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
}

From source file:org.warcbase.index.IndexerRunner.java

License:Apache License

@SuppressWarnings("static-access")
public int run(String[] args) throws IOException, ParseException {
    LOG.info("Initializing indexer...");

    Options options = new Options();

    options.addOption(//from  w  w w  . ja  v  a2 s .  c  o  m
            OptionBuilder.withArgName("file").hasArg().withDescription("input file list").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("HDFS index output path")
            .create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of shards")
            .create(SHARDS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("config file (optional)")
            .create(CONFIG_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_OPTION)
            || !cmdline.hasOption(SHARDS_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String configPath = null;
    if (cmdline.hasOption(CONFIG_OPTION)) {
        configPath = cmdline.getOptionValue(CONFIG_OPTION);
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputPath = cmdline.getOptionValue(INDEX_OPTION);
    int shards = Integer.parseInt(cmdline.getOptionValue(SHARDS_OPTION));

    JobConf conf = new JobConf(getConf(), IndexerRunner.class);

    if (configPath == null) {
        LOG.info("Config not specified, using default src/main/solr/WARCIndexer.conf");
        configPath = "src/main/solr/WARCIndexer.conf";
    }
    File configFile = new File(configPath);
    if (!configFile.exists()) {
        LOG.error("Error: config does not exist!");
        System.exit(-1);
    }
    Config config = ConfigFactory.parseFile(configFile);
    conf.set(CONFIG_PROPERTIES, config.withOnlyPath("warc").root().render(ConfigRenderOptions.concise()));

    FileSystem fs = FileSystem.get(conf);

    LOG.info("HDFS index output path: " + outputPath);
    conf.set(IndexerReducer.HDFS_OUTPUT_PATH, outputPath);
    if (fs.exists(new Path(outputPath))) {
        LOG.error("Error: path exists already!");
        System.exit(-1);
    }

    LOG.info("Number of shards: " + shards);
    conf.setInt(IndexerMapper.NUM_SHARDS, shards);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    conf.setJobName(IndexerRunner.class.getSimpleName() + ": " + inputPath);
    conf.setInputFormat(ArchiveFileInputFormat.class);
    conf.setMapperClass(IndexerMapper.class);
    conf.setReducerClass(IndexerReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.job.user.classpath.first", true);
    // Also set reduce speculative execution off, avoiding duplicate submissions to Solr.
    conf.setBoolean("mapreduce.reduce.speculative", false);

    // Note that we need this to ensure FileSystem.get is thread-safe:
    // @see https://issues.apache.org/jira/browse/HDFS-925
    // @see https://mail-archives.apache.org/mod_mbox/hadoop-user/201208.mbox/%3CCA+4kjVt-QE2L83p85uELjWXiog25bYTKOZXdc1Ahun+oBSJYpQ@mail.gmail.com%3E
    conf.setBoolean("fs.hdfs.impl.disable.cache", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(WritableSolrRecord.class);
    conf.setNumReduceTasks(shards); // number of reducers = number of shards

    cacheSolrHome(conf, solrHomeZipName);

    JobClient.runJob(conf);

    return 0;
}

From source file:org.zuinnote.hadoop.bitcoin.example.driver.BitcoinBlockCounterDriver.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(BitcoinBlockCounterDriver.class);
    conf.setJobName("example-hadoop-bitcoin-transactioncounter-job");
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(BitcoinBlockMap.class);
    conf.setReducerClass(BitcoinBlockReducer.class);

    conf.setInputFormat(BitcoinBlockFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    /** Set as an example some of the options to configure the Bitcoin fileformat **/
    /** Find here all configuration options: https://github.com/ZuInnoTe/hadoopcryptoledger/wiki/Hadoop-File-Format **/
    conf.set("hadoopcryptoledger.bitcoinblockinputformat.filter.magic", "F9BEB4D9");
    FileInputFormat.addInputPath(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from   w w  w  .j ava 2s . co  m*/
}

From source file:org.zuinnote.hadoop.bitcoin.example.driver.BitcoinTransactionCounterDriver.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(BitcoinTransactionCounterDriver.class);
    conf.setJobName("example-hadoop-bitcoin-transactioninputcounter-job");
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(BitcoinTransactionMap.class);
    conf.setReducerClass(BitcoinTransactionReducer.class);

    conf.setInputFormat(BitcoinTransactionFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.addInputPath(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);//w  w w .  j  a v a  2 s .co m
}

From source file:pathmerge.linear.MergePathH1Driver.java

License:Apache License

public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
        int mergeRound, String defaultConfPath) throws IOException {

    JobConf conf = new JobConf(MergePathH1Driver.class);
    conf.setInt("sizeKmer", sizeKmer);

    if (defaultConfPath != null) {
        conf.addResource(new Path(defaultConfPath));
    }//from w  w w .java2 s .c  o  m
    conf.setJobName("Initial Path-Starting-Points Table");
    conf.setMapperClass(SNodeInitialMapper.class);
    conf.setReducerClass(SNodeInitialReducer.class);

    conf.setMapOutputKeyClass(Kmer.class);
    conf.setMapOutputValueClass(MergePathValueWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    String singlePointPath = "comSinglePath0";

    MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
            VKmerBytesWritable.class, MergePathValueWritable.class);

    conf.setOutputKeyClass(VKmerBytesWritable.class);
    conf.setOutputValueClass(MergePathValueWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
    conf.setNumReduceTasks(numReducers);
    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(inputPath + "stepNext"), true);
    JobClient.runJob(conf);
    dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath),
            new Path(mergeResultPath + "/" + singlePointPath));
    int iMerge = 0;
    /*----------------------------------------------------------------------*/
    for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
        //            if (!dfs.exists(new Path(inputPath + "-step1")))
        //                break;
        conf = new JobConf(MergePathH1Driver.class);
        conf.setInt("sizeKmer", sizeKmer);
        conf.setInt("iMerge", iMerge);

        if (defaultConfPath != null) {
            conf.addResource(new Path(defaultConfPath));
        }
        conf.setJobName("Path Merge");

        conf.setMapperClass(MergePathH1Mapper.class);
        conf.setReducerClass(MergePathH1Reducer.class);

        conf.setMapOutputKeyClass(VKmerBytesWritable.class);
        conf.setMapOutputValueClass(MergePathValueWritable.class);

        conf.setInputFormat(SequenceFileInputFormat.class);

        String uncompSinglePath = "uncompSinglePath" + iMerge;
        String comSinglePath = "comSinglePath" + iMerge;
        String comCircle = "comCircle" + iMerge;

        MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
                VKmerBytesWritable.class, MergePathValueWritable.class);

        MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
                VKmerBytesWritable.class, MergePathValueWritable.class);

        MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
                VKmerBytesWritable.class, MergePathValueWritable.class);

        conf.setOutputKeyClass(VKmerBytesWritable.class);
        conf.setOutputValueClass(MergePathValueWritable.class);

        FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
        conf.setNumReduceTasks(numReducers);
        dfs.delete(new Path(outputPath), true);
        JobClient.runJob(conf);
        dfs.delete(new Path(inputPath + "stepNext"), true);
        dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
        dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
        dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
    }
}

From source file:pathmerge.log.MergePathH2Driver.java

License:Apache License

public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
        int mergeRound, String defaultConfPath) throws IOException {

    JobConf conf = new JobConf(MergePathH2Driver.class);
    conf.setInt("sizeKmer", sizeKmer);

    if (defaultConfPath != null) {
        conf.addResource(new Path(defaultConfPath));
    }/*from ww w .ja  v a 2s . c o  m*/
    conf.setJobName("Initial Path-Starting-Points Table");
    conf.setMapperClass(SNodeInitialMapper.class);
    conf.setReducerClass(SNodeInitialReducer.class);

    conf.setMapOutputKeyClass(Kmer.class);
    conf.setMapOutputValueClass(MergePathValueWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    String singlePointPath = "comSinglePath0";

    MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
            VKmerBytesWritable.class, MergePathValueWritable.class);

    conf.setOutputKeyClass(VKmerBytesWritable.class);
    conf.setOutputValueClass(MergePathValueWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
    conf.setNumReduceTasks(numReducers);
    FileSystem dfs = FileSystem.get(conf);
    dfs.delete(new Path(inputPath + "stepNext"), true);
    JobClient.runJob(conf);
    dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath),
            new Path(mergeResultPath + "/" + singlePointPath));

    int iMerge = 0;
    for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
        //            if (!dfs.exists(new Path(inputPath + "-step1")))
        //                break;
        conf = new JobConf(MergePathH2Driver.class);
        conf.setInt("sizeKmer", sizeKmer);
        conf.setInt("iMerge", iMerge);

        if (defaultConfPath != null) {
            conf.addResource(new Path(defaultConfPath));
        }
        conf.setJobName("Path Merge");

        conf.setMapperClass(MergePathH2Mapper.class);
        conf.setReducerClass(MergePathH2Reducer.class);

        conf.setMapOutputKeyClass(VKmerBytesWritable.class);
        conf.setMapOutputValueClass(MergePathValueWritable.class);

        conf.setInputFormat(SequenceFileInputFormat.class);

        String uncompSinglePath = "uncompSinglePath" + iMerge;
        String comSinglePath = "comSinglePath" + iMerge;
        String comCircle = "comCircle" + iMerge;

        MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
                VKmerBytesWritable.class, MergePathValueWritable.class);

        MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
                VKmerBytesWritable.class, MergePathValueWritable.class);

        MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
                VKmerBytesWritable.class, MergePathValueWritable.class);

        conf.setOutputKeyClass(VKmerBytesWritable.class);
        conf.setOutputValueClass(MergePathValueWritable.class);

        FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
        conf.setNumReduceTasks(numReducers);
        dfs.delete(new Path(outputPath), true);
        JobClient.runJob(conf);
        dfs.delete(new Path(inputPath + "stepNext"), true);
        dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
        dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
        dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
    }
    /*        conf = new JobConf(MergePathH2Driver.class);
            conf.setInt("sizeKmer", sizeKmer);
            conf.setInt("iMerge", iMerge);
            
            if (defaultConfPath != null) {
    conf.addResource(new Path(defaultConfPath));
            }
            conf.setJobName("Path Merge");
            
            conf.setMapperClass(MergePathH2Mapper.class);
            conf.setReducerClass(MergePathH2Reducer.class);
            
            conf.setMapOutputKeyClass(VKmerBytesWritable.class);
            conf.setMapOutputValueClass(MergePathValueWritable.class);
            
            conf.setInputFormat(SequenceFileInputFormat.class);
            
            String uncompSinglePath = "uncompSinglePath" + iMerge;
            String comSinglePath = "comSinglePath" + iMerge;
            String comCircle = "comCircle" + iMerge;
            
            MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiTextOutputFormat.class,
        VKmerBytesWritable.class, MergePathValueWritable.class);
            
            MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiTextOutputFormat.class,
        VKmerBytesWritable.class, MergePathValueWritable.class);
            
            MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiTextOutputFormat.class,
        VKmerBytesWritable.class, MergePathValueWritable.class);
            
            conf.setOutputKeyClass(VKmerBytesWritable.class);
            conf.setOutputValueClass(MergePathValueWritable.class);
            
            FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
            FileOutputFormat.setOutputPath(conf, new Path(outputPath));
            conf.setNumReduceTasks(numReducers);
            dfs.delete(new Path(outputPath), true);
            JobClient.runJob(conf);
            dfs.delete(new Path(inputPath + "stepNext"), true);
            dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
            dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
            dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));*/
}

From source file:PDI.Hadoop.Datamining.Tools.HistorianParser.java

/**
 * The main driver for historian map/reduce program. Invoke this method to
 * submit the map/reduce job.//from  ww w . java  2  s.  c om
 * 
 * @throws IOException
 *         When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), HistorianParser.class);
    JobClient jobClient = new JobClient(conf);

    List<String> sourcePaths = new ArrayList<String>();

    String destPath = "";
    String currentDate = DateUtils.getCurrentDateString();
    String startTS = "";
    String endTS = "";
    String pointIDS = "";
    String outputSize = "";

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(StandardPointFile.class);
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(ReduceClass.class);
    conf.setInputFormat(HistorianInputFormat.class);

    conf.set("compression", "no");
    conf.set("filePrefix", "devarchive_archive_");

    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-startTS".equals(args[i])) {
                conf.set("startTS", args[++i]);
                startTS = args[i];
            } else if ("-endTS".equals(args[i])) {
                conf.set("endTS", args[++i]);
                endTS = args[i];
            } else if ("-pointIDS".equals(args[i])) {
                conf.set("pointIDS", args[++i]);
                pointIDS = args[i];
            } else if ("-outputMaxSize".equals(args[i])) {
                conf.set("outputSize", args[++i]);
                outputSize = args[i];
            } else if ("-sourcePATH".equals(args[i])) {
                String sourcePath = "" + args[++i];
                if (sourcePath.indexOf(',') == -1) {
                    sourcePaths.add(sourcePath);
                } else {
                    String[] paths = sourcePath.split(",");
                    for (int ii = 0; ii < paths.length; ii++) {
                        sourcePaths.add(paths[ii]);
                    }
                }
            } else if ("-destPATH".equals(args[i])) {
                destPath = "" + args[++i] + "/";
            } else if ("-compression".equals(args[i])) {
                conf.set("compression", args[++i]);
            } else if ("-filePrefix".equals(args[i])) {
                conf.set("filePrefix", args[++i]);
            } else if ("-v".equals(args[i])) {
                pdi_showVersion();
                return 0;
            } else if ("-verbose".equals(args[i])) {
                this.pdi_setVerbose(true);
            } else if ("-h".equals(args[i])) {
                return printUsage();
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    // Check for the user input parameters
    if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("")
            || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) {
        System.out.println("ERROR: Wrong input parameters.");
        return printUsage();
    }

    String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS);
    String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS);

    System.out.println("-------------------------------------------------------");
    System.out.println("jobName      : " + currentDate);
    System.out.println("filePrefix   : " + conf.get("filePrefix"));
    for (int i = 0; i < sourcePaths.size(); i++) {
        System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i));
    }
    System.out.println("destPath     : " + destPath);
    System.out.println("startTS      : " + startTS + " (" + startTime + ")");
    System.out.println("endTS        : " + endTS + " (" + endTime + ")");
    System.out.println("pointIDS     : " + pointIDS);
    System.out.println("outputMaxSize: " + outputSize + " MB");
    System.out.println("compression  : " + conf.get("compression"));
    System.out.println("-------------------------------------------------------");

    PathUtils utils = new PathUtils(this.pdi_isVerbose());
    if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) {
        return -1;
    }

    // set output path to current time
    FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate));

    // set jobName to current time
    //      conf.setJobName(date.toString());
    conf.setJobName(currentDate);
    JobClient.runJob(conf); // run the job

    //      mergeAndCopyToLocal(conf, destPath);

    return 0;
}

From source file:pegasus.heigen.SaxpyTextoutput.java

License:Apache License

protected JobConf configSaxpyTextoutput(Path py, Path px, Path saxpy_output, double a) throws Exception {
    final JobConf conf = new JobConf(getConf(), SaxpyTextoutput.class);
    conf.set("y_path", py.getName());
    conf.set("x_path", px.getName());
    conf.set("a", "" + a);
    conf.setJobName("SaxpyTextoutput");

    conf.setMapperClass(SaxpyTextoutput.MapStage1.class);
    conf.setReducerClass(SaxpyTextoutput.RedStage1.class);

    FileInputFormat.setInputPaths(conf, py, px);
    FileOutputFormat.setOutputPath(conf, saxpy_output);

    conf.setNumReduceTasks(nreducers);/* www  .  j  av a 2 s  . co m*/

    conf.setOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(DoubleWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}